2010-12-15 02:40:46 +08:00
|
|
|
/******************************************************************************
|
|
|
|
* gntdev.c
|
|
|
|
*
|
|
|
|
* Device for accessing (in user-space) pages that have been granted by other
|
|
|
|
* domains.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2006-2007, D G Murray.
|
|
|
|
* (c) 2009 Gerd Hoffmann <kraxel@redhat.com>
|
2018-07-20 17:01:47 +08:00
|
|
|
* (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
|
2010-12-15 02:40:46 +08:00
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
2013-06-28 18:21:41 +08:00
|
|
|
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
|
|
|
|
|
2019-10-09 03:41:55 +08:00
|
|
|
#include <linux/dma-mapping.h>
|
2010-12-15 02:40:46 +08:00
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/miscdevice.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <linux/sched.h>
|
2017-02-09 01:51:29 +08:00
|
|
|
#include <linux/sched/mm.h>
|
2010-12-15 02:40:46 +08:00
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/slab.h>
|
2011-02-04 01:19:02 +08:00
|
|
|
#include <linux/highmem.h>
|
2017-03-06 22:21:16 +08:00
|
|
|
#include <linux/refcount.h>
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
#include <linux/workqueue.h>
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
#include <xen/xen.h>
|
|
|
|
#include <xen/grant_table.h>
|
2011-03-10 07:07:34 +08:00
|
|
|
#include <xen/balloon.h>
|
2010-12-15 02:40:46 +08:00
|
|
|
#include <xen/gntdev.h>
|
2011-02-04 01:19:04 +08:00
|
|
|
#include <xen/events.h>
|
2015-06-17 22:28:02 +08:00
|
|
|
#include <xen/page.h>
|
2010-12-15 02:40:46 +08:00
|
|
|
#include <asm/xen/hypervisor.h>
|
|
|
|
#include <asm/xen/hypercall.h>
|
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
#include "gntdev-common.h"
|
2018-07-20 17:01:48 +08:00
|
|
|
#ifdef CONFIG_XEN_GNTDEV_DMABUF
|
|
|
|
#include "gntdev-dmabuf.h"
|
|
|
|
#endif
|
2018-07-20 17:01:47 +08:00
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
MODULE_LICENSE("GPL");
|
|
|
|
MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
|
|
|
|
"Gerd Hoffmann <kraxel@redhat.com>");
|
|
|
|
MODULE_DESCRIPTION("User-space granted page access driver");
|
|
|
|
|
2019-11-07 19:15:45 +08:00
|
|
|
static unsigned int limit = 64*1024;
|
|
|
|
module_param(limit, uint, 0644);
|
|
|
|
MODULE_PARM_DESC(limit,
|
|
|
|
"Maximum number of grants that may be mapped by one mapping request");
|
2010-12-15 02:40:46 +08:00
|
|
|
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
/* True in PV mode, false otherwise */
|
2011-02-04 01:19:02 +08:00
|
|
|
static int use_ptemod;
|
|
|
|
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
static void unmap_grant_pages(struct gntdev_grant_map *map,
|
|
|
|
int offset, int pages);
|
2011-02-04 01:19:02 +08:00
|
|
|
|
2018-07-20 17:01:46 +08:00
|
|
|
static struct miscdevice gntdev_miscdev;
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
|
2019-11-07 19:15:45 +08:00
|
|
|
bool gntdev_test_page_count(unsigned int count)
|
2018-07-20 17:01:47 +08:00
|
|
|
{
|
2019-11-07 19:15:45 +08:00
|
|
|
return !count || count > limit;
|
2018-07-20 17:01:47 +08:00
|
|
|
}
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
static void gntdev_print_maps(struct gntdev_priv *priv,
|
|
|
|
char *text, int text_index)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2011-02-04 01:18:59 +08:00
|
|
|
pr_debug("%s: maps list (priv %p)\n", __func__, priv);
|
2010-12-15 02:40:46 +08:00
|
|
|
list_for_each_entry(map, &priv->maps, next)
|
|
|
|
pr_debug(" index %2d, count %2d %s\n",
|
|
|
|
map->index, map->count,
|
|
|
|
map->index == text_index && text ? text : "");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
static void gntdev_free_map(struct gntdev_grant_map *map)
|
2012-10-24 19:39:02 +08:00
|
|
|
{
|
|
|
|
if (map == NULL)
|
|
|
|
return;
|
|
|
|
|
2018-07-20 17:01:46 +08:00
|
|
|
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
|
|
|
|
if (map->dma_vaddr) {
|
|
|
|
struct gnttab_dma_alloc_args args;
|
|
|
|
|
|
|
|
args.dev = map->dma_dev;
|
|
|
|
args.coherent = !!(map->dma_flags & GNTDEV_DMA_FLAG_COHERENT);
|
|
|
|
args.nr_pages = map->count;
|
|
|
|
args.pages = map->pages;
|
|
|
|
args.frames = map->frames;
|
|
|
|
args.vaddr = map->dma_vaddr;
|
|
|
|
args.dev_bus_addr = map->dma_bus_addr;
|
|
|
|
|
|
|
|
gnttab_dma_free_pages(&args);
|
|
|
|
} else
|
|
|
|
#endif
|
2012-10-24 19:39:02 +08:00
|
|
|
if (map->pages)
|
2015-01-09 02:06:01 +08:00
|
|
|
gnttab_free_pages(map->count, map->pages);
|
2018-07-20 17:01:46 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
|
2019-11-07 19:15:46 +08:00
|
|
|
kvfree(map->frames);
|
2018-07-20 17:01:46 +08:00
|
|
|
#endif
|
2019-11-07 19:15:46 +08:00
|
|
|
kvfree(map->pages);
|
|
|
|
kvfree(map->grants);
|
|
|
|
kvfree(map->map_ops);
|
|
|
|
kvfree(map->unmap_ops);
|
|
|
|
kvfree(map->kmap_ops);
|
|
|
|
kvfree(map->kunmap_ops);
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
kvfree(map->being_removed);
|
2012-10-24 19:39:02 +08:00
|
|
|
kfree(map);
|
|
|
|
}
|
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
|
2018-07-20 17:01:46 +08:00
|
|
|
int dma_flags)
|
2010-12-15 02:40:46 +08:00
|
|
|
{
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *add;
|
2010-12-10 22:56:42 +08:00
|
|
|
int i;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
add = kzalloc(sizeof(*add), GFP_KERNEL);
|
2010-12-15 02:40:46 +08:00
|
|
|
if (NULL == add)
|
|
|
|
return NULL;
|
|
|
|
|
2021-03-10 18:46:13 +08:00
|
|
|
add->grants = kvmalloc_array(count, sizeof(add->grants[0]),
|
|
|
|
GFP_KERNEL);
|
|
|
|
add->map_ops = kvmalloc_array(count, sizeof(add->map_ops[0]),
|
|
|
|
GFP_KERNEL);
|
|
|
|
add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
|
|
|
|
GFP_KERNEL);
|
2019-11-07 19:15:46 +08:00
|
|
|
add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
add->being_removed =
|
|
|
|
kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
|
2010-12-10 22:56:42 +08:00
|
|
|
if (NULL == add->grants ||
|
|
|
|
NULL == add->map_ops ||
|
|
|
|
NULL == add->unmap_ops ||
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
NULL == add->pages ||
|
|
|
|
NULL == add->being_removed)
|
2010-12-15 02:40:46 +08:00
|
|
|
goto err;
|
2021-03-10 18:45:00 +08:00
|
|
|
if (use_ptemod) {
|
2021-03-10 18:46:13 +08:00
|
|
|
add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
|
|
|
|
GFP_KERNEL);
|
|
|
|
add->kunmap_ops = kvmalloc_array(count, sizeof(add->kunmap_ops[0]),
|
|
|
|
GFP_KERNEL);
|
2021-03-10 18:45:00 +08:00
|
|
|
if (NULL == add->kmap_ops || NULL == add->kunmap_ops)
|
|
|
|
goto err;
|
|
|
|
}
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2018-07-20 17:01:46 +08:00
|
|
|
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
|
|
|
|
add->dma_flags = dma_flags;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if this mapping is requested to be backed
|
|
|
|
* by a DMA buffer.
|
|
|
|
*/
|
|
|
|
if (dma_flags & (GNTDEV_DMA_FLAG_WC | GNTDEV_DMA_FLAG_COHERENT)) {
|
|
|
|
struct gnttab_dma_alloc_args args;
|
|
|
|
|
2019-11-07 19:15:46 +08:00
|
|
|
add->frames = kvcalloc(count, sizeof(add->frames[0]),
|
|
|
|
GFP_KERNEL);
|
2018-07-20 17:01:46 +08:00
|
|
|
if (!add->frames)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/* Remember the device, so we can free DMA memory. */
|
|
|
|
add->dma_dev = priv->dma_dev;
|
|
|
|
|
|
|
|
args.dev = priv->dma_dev;
|
|
|
|
args.coherent = !!(dma_flags & GNTDEV_DMA_FLAG_COHERENT);
|
|
|
|
args.nr_pages = count;
|
|
|
|
args.pages = add->pages;
|
|
|
|
args.frames = add->frames;
|
|
|
|
|
|
|
|
if (gnttab_dma_alloc_pages(&args))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
add->dma_vaddr = args.vaddr;
|
|
|
|
add->dma_bus_addr = args.dev_bus_addr;
|
|
|
|
} else
|
|
|
|
#endif
|
2015-01-09 02:06:01 +08:00
|
|
|
if (gnttab_alloc_pages(count, add->pages))
|
2011-03-10 07:07:34 +08:00
|
|
|
goto err;
|
|
|
|
|
2010-12-10 22:56:42 +08:00
|
|
|
for (i = 0; i < count; i++) {
|
2021-03-10 18:46:13 +08:00
|
|
|
add->grants[i].domid = DOMID_INVALID;
|
|
|
|
add->grants[i].ref = INVALID_GRANT_REF;
|
2021-03-10 18:45:26 +08:00
|
|
|
add->map_ops[i].handle = INVALID_GRANT_HANDLE;
|
|
|
|
add->unmap_ops[i].handle = INVALID_GRANT_HANDLE;
|
2021-03-10 18:45:00 +08:00
|
|
|
if (use_ptemod) {
|
2021-03-10 18:45:26 +08:00
|
|
|
add->kmap_ops[i].handle = INVALID_GRANT_HANDLE;
|
|
|
|
add->kunmap_ops[i].handle = INVALID_GRANT_HANDLE;
|
2021-03-10 18:45:00 +08:00
|
|
|
}
|
2010-12-10 22:56:42 +08:00
|
|
|
}
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
add->index = 0;
|
|
|
|
add->count = count;
|
2017-03-06 22:21:16 +08:00
|
|
|
refcount_set(&add->users, 1);
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
return add;
|
|
|
|
|
|
|
|
err:
|
2012-10-24 19:39:02 +08:00
|
|
|
gntdev_free_map(add);
|
2010-12-15 02:40:46 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
void gntdev_add_map(struct gntdev_priv *priv, struct gntdev_grant_map *add)
|
2010-12-15 02:40:46 +08:00
|
|
|
{
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
list_for_each_entry(map, &priv->maps, next) {
|
|
|
|
if (add->index + add->count < map->index) {
|
|
|
|
list_add_tail(&add->next, &map->next);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
add->index = map->index + map->count;
|
|
|
|
}
|
|
|
|
list_add_tail(&add->next, &priv->maps);
|
|
|
|
|
|
|
|
done:
|
|
|
|
gntdev_print_maps(priv, "[new]", add->index);
|
|
|
|
}
|
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
static struct gntdev_grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
|
|
|
|
int index, int count)
|
2010-12-15 02:40:46 +08:00
|
|
|
{
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
list_for_each_entry(map, &priv->maps, next) {
|
|
|
|
if (map->index != index)
|
|
|
|
continue;
|
2011-02-04 01:19:04 +08:00
|
|
|
if (count && map->count != count)
|
2010-12-15 02:40:46 +08:00
|
|
|
continue;
|
|
|
|
return map;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
|
2010-12-15 02:40:46 +08:00
|
|
|
{
|
|
|
|
if (!map)
|
|
|
|
return;
|
2010-12-10 22:56:42 +08:00
|
|
|
|
2017-03-06 22:21:16 +08:00
|
|
|
if (!refcount_dec_and_test(&map->users))
|
2011-02-04 01:19:01 +08:00
|
|
|
return;
|
|
|
|
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
if (map->pages && !use_ptemod) {
|
|
|
|
/*
|
|
|
|
* Increment the reference count. This ensures that the
|
|
|
|
* subsequent call to unmap_grant_pages() will not wind up
|
|
|
|
* re-entering itself. It *can* wind up calling
|
|
|
|
* gntdev_put_map() recursively, but such calls will be with a
|
|
|
|
* reference count greater than 1, so they will return before
|
|
|
|
* this code is reached. The recursion depth is thus limited to
|
|
|
|
* 1. Do NOT use refcount_inc() here, as it will detect that
|
|
|
|
* the reference count is zero and WARN().
|
|
|
|
*/
|
|
|
|
refcount_set(&map->users, 1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unmap the grants. This may or may not be asynchronous, so it
|
|
|
|
* is possible that the reference count is 1 on return, but it
|
|
|
|
* could also be greater than 1.
|
|
|
|
*/
|
2021-12-10 17:28:17 +08:00
|
|
|
unmap_grant_pages(map, 0, map->count);
|
|
|
|
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
/* Check if the memory now needs to be freed */
|
|
|
|
if (!refcount_dec_and_test(&map->users))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All pages have been returned to the hypervisor, so free the
|
|
|
|
* map.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
if (use_ptemod && map->notifier_init)
|
|
|
|
mmu_interval_notifier_remove(&map->notifier);
|
|
|
|
|
2011-10-28 05:58:49 +08:00
|
|
|
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
|
2011-02-04 01:19:04 +08:00
|
|
|
notify_remote_via_evtchn(map->notify.event);
|
2011-10-28 05:58:49 +08:00
|
|
|
evtchn_put(map->notify.event);
|
|
|
|
}
|
2012-10-24 19:39:02 +08:00
|
|
|
gntdev_free_map(map);
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
|
2019-07-12 11:58:43 +08:00
|
|
|
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
|
2010-12-15 02:40:46 +08:00
|
|
|
{
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map = data;
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
|
2021-07-30 15:18:04 +08:00
|
|
|
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
|
|
|
|
(1 << _GNTMAP_guest_avail0);
|
2010-12-15 02:40:46 +08:00
|
|
|
u64 pte_maddr;
|
|
|
|
|
|
|
|
BUG_ON(pgnr >= map->count);
|
2010-12-09 02:54:32 +08:00
|
|
|
pte_maddr = arbitrary_virt_to_machine(pte).maddr;
|
|
|
|
|
2011-02-04 01:19:02 +08:00
|
|
|
gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
|
2010-12-15 02:40:46 +08:00
|
|
|
map->grants[pgnr].ref,
|
|
|
|
map->grants[pgnr].domid);
|
2011-02-04 01:19:02 +08:00
|
|
|
gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
|
2021-03-10 18:45:26 +08:00
|
|
|
INVALID_GRANT_HANDLE);
|
2010-12-15 02:40:46 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
|
2010-12-15 02:40:46 +08:00
|
|
|
{
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
size_t alloced = 0;
|
2010-12-15 02:40:46 +08:00
|
|
|
int i, err = 0;
|
2011-02-04 01:19:02 +08:00
|
|
|
|
|
|
|
if (!use_ptemod) {
|
2011-02-10 05:11:32 +08:00
|
|
|
/* Note: it could already be mapped */
|
2021-03-10 18:45:26 +08:00
|
|
|
if (map->map_ops[0].handle != INVALID_GRANT_HANDLE)
|
2011-02-10 05:11:32 +08:00
|
|
|
return 0;
|
2011-02-04 01:19:02 +08:00
|
|
|
for (i = 0; i < map->count; i++) {
|
2011-03-09 00:56:43 +08:00
|
|
|
unsigned long addr = (unsigned long)
|
2011-02-04 01:19:02 +08:00
|
|
|
pfn_to_kaddr(page_to_pfn(map->pages[i]));
|
|
|
|
gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
|
|
|
|
map->grants[i].ref,
|
|
|
|
map->grants[i].domid);
|
|
|
|
gnttab_set_unmap_op(&map->unmap_ops[i], addr,
|
2021-03-10 18:45:26 +08:00
|
|
|
map->flags, INVALID_GRANT_HANDLE);
|
2011-02-04 01:19:02 +08:00
|
|
|
}
|
2011-09-29 18:57:56 +08:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Setup the map_ops corresponding to the pte entries pointing
|
|
|
|
* to the kernel linear addresses of the struct pages.
|
|
|
|
* These ptes are completely different from the user ptes dealt
|
|
|
|
* with find_grant_ptes.
|
2021-02-15 15:51:07 +08:00
|
|
|
* Note that GNTMAP_device_map isn't needed here: The
|
|
|
|
* dev_bus_addr output field gets consumed only from ->map_ops,
|
|
|
|
* and by not requesting it when mapping we also avoid needing
|
|
|
|
* to mirror dev_bus_addr into ->unmap_ops (and holding an extra
|
|
|
|
* reference to the page in the hypervisor).
|
2011-09-29 18:57:56 +08:00
|
|
|
*/
|
2021-02-15 15:51:07 +08:00
|
|
|
unsigned int flags = (map->flags & ~GNTMAP_device_map) |
|
|
|
|
GNTMAP_host_map;
|
|
|
|
|
2011-09-29 18:57:56 +08:00
|
|
|
for (i = 0; i < map->count; i++) {
|
|
|
|
unsigned long address = (unsigned long)
|
|
|
|
pfn_to_kaddr(page_to_pfn(map->pages[i]));
|
|
|
|
BUG_ON(PageHighMem(map->pages[i]));
|
|
|
|
|
2021-02-15 15:51:07 +08:00
|
|
|
gnttab_set_map_op(&map->kmap_ops[i], address, flags,
|
2011-09-29 18:57:56 +08:00
|
|
|
map->grants[i].ref,
|
|
|
|
map->grants[i].domid);
|
2015-01-05 22:13:41 +08:00
|
|
|
gnttab_set_unmap_op(&map->kunmap_ops[i], address,
|
2021-03-10 18:45:26 +08:00
|
|
|
flags, INVALID_GRANT_HANDLE);
|
2011-09-29 18:57:56 +08:00
|
|
|
}
|
2011-02-04 01:19:02 +08:00
|
|
|
}
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
pr_debug("map %d+%d\n", map->index, map->count);
|
2021-03-10 18:45:00 +08:00
|
|
|
err = gnttab_map_refs(map->map_ops, map->kmap_ops, map->pages,
|
|
|
|
map->count);
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
for (i = 0; i < map->count; i++) {
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
if (map->map_ops[i].status == GNTST_okay) {
|
2021-02-15 15:52:27 +08:00
|
|
|
map->unmap_ops[i].handle = map->map_ops[i].handle;
|
xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Acked-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:05 +08:00
|
|
|
alloced++;
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
} else if (!err)
|
2010-12-15 02:40:46 +08:00
|
|
|
err = -EINVAL;
|
2015-01-05 22:13:41 +08:00
|
|
|
|
2021-02-15 15:51:07 +08:00
|
|
|
if (map->flags & GNTMAP_device_map)
|
|
|
|
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
|
|
|
|
|
2021-02-15 15:52:27 +08:00
|
|
|
if (use_ptemod) {
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
if (map->kmap_ops[i].status == GNTST_okay) {
|
xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Acked-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:05 +08:00
|
|
|
alloced++;
|
2021-02-15 15:52:27 +08:00
|
|
|
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
} else if (!err)
|
2021-02-15 15:52:27 +08:00
|
|
|
err = -EINVAL;
|
|
|
|
}
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
atomic_add(alloced, &map->live_grants);
|
2010-12-15 02:40:46 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
static void __unmap_grant_pages_done(int result,
|
|
|
|
struct gntab_unmap_queue_data *data)
|
2010-12-15 02:40:46 +08:00
|
|
|
{
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
unsigned int i;
|
|
|
|
struct gntdev_grant_map *map = data->data;
|
|
|
|
unsigned int offset = data->unmap_ops - map->unmap_ops;
|
xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Acked-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:05 +08:00
|
|
|
int successful_unmaps = 0;
|
|
|
|
int live_grants;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
for (i = 0; i < data->count; i++) {
|
xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Acked-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:05 +08:00
|
|
|
if (map->unmap_ops[offset + i].status == GNTST_okay &&
|
|
|
|
map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
|
|
|
|
successful_unmaps++;
|
|
|
|
|
2022-07-11 07:05:22 +08:00
|
|
|
WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
|
|
|
|
map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
|
2011-02-23 21:11:35 +08:00
|
|
|
pr_debug("unmap handle=%d st=%d\n",
|
|
|
|
map->unmap_ops[offset+i].handle,
|
|
|
|
map->unmap_ops[offset+i].status);
|
2021-03-10 18:45:26 +08:00
|
|
|
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
|
2021-09-17 14:13:08 +08:00
|
|
|
if (use_ptemod) {
|
xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Acked-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:05 +08:00
|
|
|
if (map->kunmap_ops[offset + i].status == GNTST_okay &&
|
|
|
|
map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
|
|
|
|
successful_unmaps++;
|
|
|
|
|
2022-07-11 07:05:22 +08:00
|
|
|
WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
|
|
|
|
map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
|
2021-09-17 14:13:08 +08:00
|
|
|
pr_debug("kunmap handle=%u st=%d\n",
|
|
|
|
map->kunmap_ops[offset+i].handle,
|
|
|
|
map->kunmap_ops[offset+i].status);
|
|
|
|
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
|
|
|
|
}
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Acked-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:05 +08:00
|
|
|
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
/*
|
|
|
|
* Decrease the live-grant counter. This must happen after the loop to
|
|
|
|
* prevent premature reuse of the grants by gnttab_mmap().
|
|
|
|
*/
|
xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Acked-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:05 +08:00
|
|
|
live_grants = atomic_sub_return(successful_unmaps, &map->live_grants);
|
|
|
|
if (WARN_ON(live_grants < 0))
|
|
|
|
pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n",
|
|
|
|
__func__, live_grants, successful_unmaps);
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
|
|
|
|
/* Release reference taken by __unmap_grant_pages */
|
|
|
|
gntdev_put_map(NULL, map);
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
|
|
|
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
|
|
|
|
int pages)
|
2011-02-10 04:12:00 +08:00
|
|
|
{
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
|
|
|
|
int pgno = (map->notify.addr >> PAGE_SHIFT);
|
|
|
|
|
|
|
|
if (pgno >= offset && pgno < offset + pages) {
|
|
|
|
/* No need for kmap, pages are in lowmem */
|
|
|
|
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
|
|
|
|
|
|
|
|
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
|
|
|
|
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
map->unmap_data.unmap_ops = map->unmap_ops + offset;
|
|
|
|
map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
|
|
|
|
map->unmap_data.pages = map->pages + offset;
|
|
|
|
map->unmap_data.count = pages;
|
|
|
|
map->unmap_data.done = __unmap_grant_pages_done;
|
|
|
|
map->unmap_data.data = map;
|
|
|
|
refcount_inc(&map->users); /* to keep map alive during async call below */
|
|
|
|
|
|
|
|
gnttab_unmap_refs_async(&map->unmap_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
|
|
|
|
int pages)
|
|
|
|
{
|
|
|
|
int range;
|
|
|
|
|
|
|
|
if (atomic_read(&map->live_grants) == 0)
|
|
|
|
return; /* Nothing to do */
|
2011-02-10 04:12:00 +08:00
|
|
|
|
|
|
|
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
|
|
|
|
|
|
|
|
/* It is possible the requested range will have a "hole" where we
|
|
|
|
* already unmapped some of the grants. Only unmap valid ranges.
|
|
|
|
*/
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
while (pages) {
|
|
|
|
while (pages && map->being_removed[offset]) {
|
2011-02-10 04:12:00 +08:00
|
|
|
offset++;
|
|
|
|
pages--;
|
|
|
|
}
|
|
|
|
range = 0;
|
|
|
|
while (range < pages) {
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
if (map->being_removed[offset + range])
|
2011-02-10 04:12:00 +08:00
|
|
|
break;
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
map->being_removed[offset + range] = true;
|
2011-02-10 04:12:00 +08:00
|
|
|
range++;
|
|
|
|
}
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
if (range)
|
|
|
|
__unmap_grant_pages(map, offset, range);
|
2011-02-10 04:12:00 +08:00
|
|
|
offset += range;
|
|
|
|
pages -= range;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
|
2011-03-08 04:18:57 +08:00
|
|
|
static void gntdev_vma_open(struct vm_area_struct *vma)
|
|
|
|
{
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map = vma->vm_private_data;
|
2011-03-08 04:18:57 +08:00
|
|
|
|
|
|
|
pr_debug("gntdev_vma_open %p\n", vma);
|
2017-03-06 22:21:16 +08:00
|
|
|
refcount_inc(&map->users);
|
2011-03-08 04:18:57 +08:00
|
|
|
}
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
static void gntdev_vma_close(struct vm_area_struct *vma)
|
|
|
|
{
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map = vma->vm_private_data;
|
2013-01-03 06:57:12 +08:00
|
|
|
struct file *file = vma->vm_file;
|
|
|
|
struct gntdev_priv *priv = file->private_data;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2011-03-08 04:18:57 +08:00
|
|
|
pr_debug("gntdev_vma_close %p\n", vma);
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
vma->vm_private_data = NULL;
|
2013-01-03 06:57:12 +08:00
|
|
|
gntdev_put_map(priv, map);
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
|
|
|
|
2014-12-18 22:59:07 +08:00
|
|
|
static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma,
|
|
|
|
unsigned long addr)
|
|
|
|
{
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map = vma->vm_private_data;
|
2014-12-18 22:59:07 +08:00
|
|
|
|
|
|
|
return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT];
|
|
|
|
}
|
|
|
|
|
2015-09-10 06:39:26 +08:00
|
|
|
static const struct vm_operations_struct gntdev_vmops = {
|
2011-03-08 04:18:57 +08:00
|
|
|
.open = gntdev_vma_open,
|
2010-12-15 02:40:46 +08:00
|
|
|
.close = gntdev_vma_close,
|
2014-12-18 22:59:07 +08:00
|
|
|
.find_special_page = gntdev_vma_find_special_page,
|
2010-12-15 02:40:46 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
|
2019-11-13 04:22:31 +08:00
|
|
|
static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
|
|
|
|
const struct mmu_notifier_range *range,
|
|
|
|
unsigned long cur_seq)
|
2013-01-03 06:57:12 +08:00
|
|
|
{
|
2019-11-13 04:22:31 +08:00
|
|
|
struct gntdev_grant_map *map =
|
|
|
|
container_of(mn, struct gntdev_grant_map, notifier);
|
2013-01-03 06:57:12 +08:00
|
|
|
unsigned long mstart, mend;
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
unsigned long map_start, map_end;
|
2013-01-03 06:57:12 +08:00
|
|
|
|
2019-11-13 04:22:31 +08:00
|
|
|
if (!mmu_notifier_range_blockable(range))
|
|
|
|
return false;
|
2018-09-05 07:21:39 +08:00
|
|
|
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
map_start = map->pages_vm_start;
|
|
|
|
map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
|
|
|
|
|
2019-11-13 04:22:31 +08:00
|
|
|
/*
|
|
|
|
* If the VMA is split or otherwise changed the notifier is not
|
|
|
|
* updated, but we don't want to process VA's outside the modified
|
|
|
|
* VMA. FIXME: It would be much more understandable to just prevent
|
|
|
|
* modifying the VMA in the first place.
|
|
|
|
*/
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
if (map_start >= range->end || map_end <= range->start)
|
2019-11-13 04:22:31 +08:00
|
|
|
return true;
|
2018-09-05 07:21:39 +08:00
|
|
|
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
mstart = max(range->start, map_start);
|
|
|
|
mend = min(range->end, map_end);
|
2013-01-03 06:57:12 +08:00
|
|
|
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
map->index, map->count, map_start, map_end,
|
|
|
|
range->start, range->end, mstart, mend);
|
|
|
|
unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
|
|
|
|
(mend - mstart) >> PAGE_SHIFT);
|
2018-09-05 07:21:39 +08:00
|
|
|
|
2019-11-13 04:22:31 +08:00
|
|
|
return true;
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
|
|
|
|
2019-11-13 04:22:31 +08:00
|
|
|
static const struct mmu_interval_notifier_ops gntdev_mmu_ops = {
|
|
|
|
.invalidate = gntdev_invalidate,
|
2010-12-15 02:40:46 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
|
|
|
|
static int gntdev_open(struct inode *inode, struct file *flip)
|
|
|
|
{
|
|
|
|
struct gntdev_priv *priv;
|
|
|
|
|
|
|
|
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
|
|
|
|
if (!priv)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&priv->maps);
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_init(&priv->lock);
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2018-07-20 17:01:48 +08:00
|
|
|
#ifdef CONFIG_XEN_GNTDEV_DMABUF
|
2019-02-14 22:23:20 +08:00
|
|
|
priv->dmabuf_priv = gntdev_dmabuf_init(flip);
|
2018-07-20 17:01:48 +08:00
|
|
|
if (IS_ERR(priv->dmabuf_priv)) {
|
2019-11-11 20:20:09 +08:00
|
|
|
int ret = PTR_ERR(priv->dmabuf_priv);
|
2018-07-20 17:01:48 +08:00
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
kfree(priv);
|
|
|
|
return ret;
|
|
|
|
}
|
2019-11-11 20:20:09 +08:00
|
|
|
#endif
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
flip->private_data = priv;
|
2018-07-20 17:01:46 +08:00
|
|
|
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
|
|
|
|
priv->dma_dev = gntdev_miscdev.this_device;
|
2019-10-09 03:41:55 +08:00
|
|
|
dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64));
|
2018-07-20 17:01:46 +08:00
|
|
|
#endif
|
2010-12-15 02:40:46 +08:00
|
|
|
pr_debug("priv %p\n", priv);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int gntdev_release(struct inode *inode, struct file *flip)
|
|
|
|
{
|
|
|
|
struct gntdev_priv *priv = flip->private_data;
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
pr_debug("priv %p\n", priv);
|
|
|
|
|
2015-06-26 09:28:24 +08:00
|
|
|
mutex_lock(&priv->lock);
|
2010-12-15 02:40:46 +08:00
|
|
|
while (!list_empty(&priv->maps)) {
|
2018-07-20 17:01:47 +08:00
|
|
|
map = list_entry(priv->maps.next,
|
|
|
|
struct gntdev_grant_map, next);
|
2011-02-04 01:19:01 +08:00
|
|
|
list_del(&map->next);
|
2013-01-03 06:57:12 +08:00
|
|
|
gntdev_put_map(NULL /* already removed */, map);
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
2015-06-26 09:28:24 +08:00
|
|
|
mutex_unlock(&priv->lock);
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2018-07-20 17:01:48 +08:00
|
|
|
#ifdef CONFIG_XEN_GNTDEV_DMABUF
|
|
|
|
gntdev_dmabuf_fini(priv->dmabuf_priv);
|
|
|
|
#endif
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
kfree(priv);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
|
|
|
|
struct ioctl_gntdev_map_grant_ref __user *u)
|
|
|
|
{
|
|
|
|
struct ioctl_gntdev_map_grant_ref op;
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2010-12-15 02:40:46 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
if (copy_from_user(&op, u, sizeof(op)) != 0)
|
|
|
|
return -EFAULT;
|
|
|
|
pr_debug("priv %p, add %d\n", priv, op.count);
|
2019-11-07 19:15:45 +08:00
|
|
|
if (unlikely(gntdev_test_page_count(op.count)))
|
2010-12-15 02:40:46 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
err = -ENOMEM;
|
2018-07-20 17:01:46 +08:00
|
|
|
map = gntdev_alloc_map(priv, op.count, 0 /* This is not a dma-buf. */);
|
2010-12-15 02:40:46 +08:00
|
|
|
if (!map)
|
|
|
|
return err;
|
2011-02-04 01:18:59 +08:00
|
|
|
|
2011-02-04 01:19:01 +08:00
|
|
|
if (copy_from_user(map->grants, &u->refs,
|
|
|
|
sizeof(map->grants[0]) * op.count) != 0) {
|
2013-01-03 06:57:12 +08:00
|
|
|
gntdev_put_map(NULL, map);
|
|
|
|
return -EFAULT;
|
2011-02-04 01:18:59 +08:00
|
|
|
}
|
|
|
|
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_lock(&priv->lock);
|
2010-12-15 02:40:46 +08:00
|
|
|
gntdev_add_map(priv, map);
|
|
|
|
op.index = map->index << PAGE_SHIFT;
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_unlock(&priv->lock);
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2011-02-04 01:19:01 +08:00
|
|
|
if (copy_to_user(u, &op, sizeof(op)) != 0)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
|
|
|
|
struct ioctl_gntdev_unmap_grant_ref __user *u)
|
|
|
|
{
|
|
|
|
struct ioctl_gntdev_unmap_grant_ref op;
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2010-12-15 02:40:46 +08:00
|
|
|
int err = -ENOENT;
|
|
|
|
|
|
|
|
if (copy_from_user(&op, u, sizeof(op)) != 0)
|
|
|
|
return -EFAULT;
|
|
|
|
pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count);
|
|
|
|
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_lock(&priv->lock);
|
2010-12-15 02:40:46 +08:00
|
|
|
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
|
2011-02-04 01:19:01 +08:00
|
|
|
if (map) {
|
|
|
|
list_del(&map->next);
|
|
|
|
err = 0;
|
|
|
|
}
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_unlock(&priv->lock);
|
2011-10-12 03:16:06 +08:00
|
|
|
if (map)
|
2013-01-03 06:57:12 +08:00
|
|
|
gntdev_put_map(priv, map);
|
2010-12-15 02:40:46 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
|
|
|
|
struct ioctl_gntdev_get_offset_for_vaddr __user *u)
|
|
|
|
{
|
|
|
|
struct ioctl_gntdev_get_offset_for_vaddr op;
|
2011-02-04 01:19:00 +08:00
|
|
|
struct vm_area_struct *vma;
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2013-01-03 06:57:11 +08:00
|
|
|
int rv = -EINVAL;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
if (copy_from_user(&op, u, sizeof(op)) != 0)
|
|
|
|
return -EFAULT;
|
|
|
|
pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
|
|
|
|
|
2020-06-09 12:33:25 +08:00
|
|
|
mmap_read_lock(current->mm);
|
2011-02-04 01:19:00 +08:00
|
|
|
vma = find_vma(current->mm, op.vaddr);
|
|
|
|
if (!vma || vma->vm_ops != &gntdev_vmops)
|
2013-01-03 06:57:11 +08:00
|
|
|
goto out_unlock;
|
2011-02-04 01:19:00 +08:00
|
|
|
|
|
|
|
map = vma->vm_private_data;
|
|
|
|
if (!map)
|
2013-01-03 06:57:11 +08:00
|
|
|
goto out_unlock;
|
2011-02-04 01:19:00 +08:00
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
op.offset = map->index << PAGE_SHIFT;
|
|
|
|
op.count = map->count;
|
2013-01-03 06:57:11 +08:00
|
|
|
rv = 0;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2013-01-03 06:57:11 +08:00
|
|
|
out_unlock:
|
2020-06-09 12:33:25 +08:00
|
|
|
mmap_read_unlock(current->mm);
|
2013-01-03 06:57:11 +08:00
|
|
|
|
|
|
|
if (rv == 0 && copy_to_user(u, &op, sizeof(op)) != 0)
|
2010-12-15 02:40:46 +08:00
|
|
|
return -EFAULT;
|
2013-01-03 06:57:11 +08:00
|
|
|
return rv;
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
|
|
|
|
2011-02-04 01:19:04 +08:00
|
|
|
static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
|
|
|
|
{
|
|
|
|
struct ioctl_gntdev_unmap_notify op;
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2011-02-04 01:19:04 +08:00
|
|
|
int rc;
|
2011-10-28 05:58:49 +08:00
|
|
|
int out_flags;
|
2020-03-24 00:15:11 +08:00
|
|
|
evtchn_port_t out_event;
|
2011-02-04 01:19:04 +08:00
|
|
|
|
|
|
|
if (copy_from_user(&op, u, sizeof(op)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2011-10-28 05:58:49 +08:00
|
|
|
/* We need to grab a reference to the event channel we are going to use
|
|
|
|
* to send the notify before releasing the reference we may already have
|
|
|
|
* (if someone has called this ioctl twice). This is required so that
|
|
|
|
* it is possible to change the clear_byte part of the notification
|
|
|
|
* without disturbing the event channel part, which may now be the last
|
|
|
|
* reference to that event channel.
|
|
|
|
*/
|
|
|
|
if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
|
|
|
|
if (evtchn_get(op.event_channel_port))
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
out_flags = op.action;
|
|
|
|
out_event = op.event_channel_port;
|
|
|
|
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_lock(&priv->lock);
|
2011-02-04 01:19:04 +08:00
|
|
|
|
|
|
|
list_for_each_entry(map, &priv->maps, next) {
|
|
|
|
uint64_t begin = map->index << PAGE_SHIFT;
|
|
|
|
uint64_t end = (map->index + map->count) << PAGE_SHIFT;
|
|
|
|
if (op.index >= begin && op.index < end)
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
rc = -ENOENT;
|
|
|
|
goto unlock_out;
|
|
|
|
|
|
|
|
found:
|
2011-02-10 07:15:50 +08:00
|
|
|
if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
|
|
|
|
(map->flags & GNTMAP_readonly)) {
|
|
|
|
rc = -EINVAL;
|
|
|
|
goto unlock_out;
|
|
|
|
}
|
|
|
|
|
2011-10-28 05:58:49 +08:00
|
|
|
out_flags = map->notify.flags;
|
|
|
|
out_event = map->notify.event;
|
|
|
|
|
2011-02-04 01:19:04 +08:00
|
|
|
map->notify.flags = op.action;
|
|
|
|
map->notify.addr = op.index - (map->index << PAGE_SHIFT);
|
|
|
|
map->notify.event = op.event_channel_port;
|
2011-10-28 05:58:49 +08:00
|
|
|
|
2011-02-04 01:19:04 +08:00
|
|
|
rc = 0;
|
2011-10-28 05:58:49 +08:00
|
|
|
|
2011-02-04 01:19:04 +08:00
|
|
|
unlock_out:
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_unlock(&priv->lock);
|
2011-10-28 05:58:49 +08:00
|
|
|
|
|
|
|
/* Drop the reference to the event channel we did not save in the map */
|
|
|
|
if (out_flags & UNMAP_NOTIFY_SEND_EVENT)
|
|
|
|
evtchn_put(out_event);
|
|
|
|
|
2011-02-04 01:19:04 +08:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2016-05-09 17:59:48 +08:00
|
|
|
#define GNTDEV_COPY_BATCH 16
|
2014-12-03 00:13:26 +08:00
|
|
|
|
|
|
|
struct gntdev_copy_batch {
|
|
|
|
struct gnttab_copy ops[GNTDEV_COPY_BATCH];
|
|
|
|
struct page *pages[GNTDEV_COPY_BATCH];
|
|
|
|
s16 __user *status[GNTDEV_COPY_BATCH];
|
|
|
|
unsigned int nr_ops;
|
|
|
|
unsigned int nr_pages;
|
2020-09-06 14:51:53 +08:00
|
|
|
bool writeable;
|
2014-12-03 00:13:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
|
2020-09-06 14:51:53 +08:00
|
|
|
unsigned long *gfn)
|
2014-12-03 00:13:26 +08:00
|
|
|
{
|
|
|
|
unsigned long addr = (unsigned long)virt;
|
|
|
|
struct page *page;
|
|
|
|
unsigned long xen_pfn;
|
|
|
|
int ret;
|
|
|
|
|
2020-09-06 14:51:54 +08:00
|
|
|
ret = pin_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page);
|
2014-12-03 00:13:26 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
batch->pages[batch->nr_pages++] = page;
|
|
|
|
|
|
|
|
xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(addr & ~PAGE_MASK);
|
|
|
|
*gfn = pfn_to_gfn(xen_pfn);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gntdev_put_pages(struct gntdev_copy_batch *batch)
|
|
|
|
{
|
2020-09-06 14:51:54 +08:00
|
|
|
unpin_user_pages_dirty_lock(batch->pages, batch->nr_pages, batch->writeable);
|
2014-12-03 00:13:26 +08:00
|
|
|
batch->nr_pages = 0;
|
2020-09-06 14:51:53 +08:00
|
|
|
batch->writeable = false;
|
2014-12-03 00:13:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int gntdev_copy(struct gntdev_copy_batch *batch)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
gnttab_batch_copy(batch->ops, batch->nr_ops);
|
|
|
|
gntdev_put_pages(batch);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For each completed op, update the status if the op failed
|
|
|
|
* and all previous ops for the segment were successful.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < batch->nr_ops; i++) {
|
|
|
|
s16 status = batch->ops[i].status;
|
|
|
|
s16 old_status;
|
|
|
|
|
|
|
|
if (status == GNTST_okay)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (__get_user(old_status, batch->status[i]))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (old_status != GNTST_okay)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (__put_user(status, batch->status[i]))
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
batch->nr_ops = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
|
|
|
|
struct gntdev_grant_copy_segment *seg,
|
|
|
|
s16 __user *status)
|
|
|
|
{
|
|
|
|
uint16_t copied = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Disallow local -> local copies since there is only space in
|
|
|
|
* batch->pages for one page per-op and this would be a very
|
|
|
|
* expensive memcpy().
|
|
|
|
*/
|
|
|
|
if (!(seg->flags & (GNTCOPY_source_gref | GNTCOPY_dest_gref)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Can't cross page if source/dest is a grant ref. */
|
|
|
|
if (seg->flags & GNTCOPY_source_gref) {
|
|
|
|
if (seg->source.foreign.offset + seg->len > XEN_PAGE_SIZE)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (seg->flags & GNTCOPY_dest_gref) {
|
|
|
|
if (seg->dest.foreign.offset + seg->len > XEN_PAGE_SIZE)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (put_user(GNTST_okay, status))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
while (copied < seg->len) {
|
|
|
|
struct gnttab_copy *op;
|
|
|
|
void __user *virt;
|
|
|
|
size_t len, off;
|
|
|
|
unsigned long gfn;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (batch->nr_ops >= GNTDEV_COPY_BATCH) {
|
|
|
|
ret = gntdev_copy(batch);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
len = seg->len - copied;
|
|
|
|
|
|
|
|
op = &batch->ops[batch->nr_ops];
|
|
|
|
op->flags = 0;
|
|
|
|
|
|
|
|
if (seg->flags & GNTCOPY_source_gref) {
|
|
|
|
op->source.u.ref = seg->source.foreign.ref;
|
|
|
|
op->source.domid = seg->source.foreign.domid;
|
|
|
|
op->source.offset = seg->source.foreign.offset + copied;
|
|
|
|
op->flags |= GNTCOPY_source_gref;
|
|
|
|
} else {
|
|
|
|
virt = seg->source.virt + copied;
|
|
|
|
off = (unsigned long)virt & ~XEN_PAGE_MASK;
|
|
|
|
len = min(len, (size_t)XEN_PAGE_SIZE - off);
|
2020-09-06 14:51:53 +08:00
|
|
|
batch->writeable = false;
|
2014-12-03 00:13:26 +08:00
|
|
|
|
2020-09-06 14:51:53 +08:00
|
|
|
ret = gntdev_get_page(batch, virt, &gfn);
|
2014-12-03 00:13:26 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
op->source.u.gmfn = gfn;
|
|
|
|
op->source.domid = DOMID_SELF;
|
|
|
|
op->source.offset = off;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (seg->flags & GNTCOPY_dest_gref) {
|
|
|
|
op->dest.u.ref = seg->dest.foreign.ref;
|
|
|
|
op->dest.domid = seg->dest.foreign.domid;
|
|
|
|
op->dest.offset = seg->dest.foreign.offset + copied;
|
|
|
|
op->flags |= GNTCOPY_dest_gref;
|
|
|
|
} else {
|
|
|
|
virt = seg->dest.virt + copied;
|
|
|
|
off = (unsigned long)virt & ~XEN_PAGE_MASK;
|
|
|
|
len = min(len, (size_t)XEN_PAGE_SIZE - off);
|
2020-09-06 14:51:53 +08:00
|
|
|
batch->writeable = true;
|
2014-12-03 00:13:26 +08:00
|
|
|
|
2020-09-06 14:51:53 +08:00
|
|
|
ret = gntdev_get_page(batch, virt, &gfn);
|
2014-12-03 00:13:26 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
op->dest.u.gmfn = gfn;
|
|
|
|
op->dest.domid = DOMID_SELF;
|
|
|
|
op->dest.offset = off;
|
|
|
|
}
|
|
|
|
|
|
|
|
op->len = len;
|
|
|
|
copied += len;
|
|
|
|
|
|
|
|
batch->status[batch->nr_ops] = status;
|
|
|
|
batch->nr_ops++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u)
|
|
|
|
{
|
|
|
|
struct ioctl_gntdev_grant_copy copy;
|
|
|
|
struct gntdev_copy_batch batch;
|
|
|
|
unsigned int i;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (copy_from_user(©, u, sizeof(copy)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
batch.nr_ops = 0;
|
|
|
|
batch.nr_pages = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < copy.count; i++) {
|
|
|
|
struct gntdev_grant_copy_segment seg;
|
|
|
|
|
|
|
|
if (copy_from_user(&seg, ©.segments[i], sizeof(seg))) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = gntdev_grant_copy_seg(&batch, &seg, ©.segments[i].status);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
cond_resched();
|
|
|
|
}
|
|
|
|
if (batch.nr_ops)
|
|
|
|
ret = gntdev_copy(&batch);
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
out:
|
|
|
|
gntdev_put_pages(&batch);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
static long gntdev_ioctl(struct file *flip,
|
|
|
|
unsigned int cmd, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct gntdev_priv *priv = flip->private_data;
|
|
|
|
void __user *ptr = (void __user *)arg;
|
|
|
|
|
|
|
|
switch (cmd) {
|
|
|
|
case IOCTL_GNTDEV_MAP_GRANT_REF:
|
|
|
|
return gntdev_ioctl_map_grant_ref(priv, ptr);
|
|
|
|
|
|
|
|
case IOCTL_GNTDEV_UNMAP_GRANT_REF:
|
|
|
|
return gntdev_ioctl_unmap_grant_ref(priv, ptr);
|
|
|
|
|
|
|
|
case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
|
|
|
|
return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
|
|
|
|
|
2011-02-04 01:19:04 +08:00
|
|
|
case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
|
|
|
|
return gntdev_ioctl_notify(priv, ptr);
|
|
|
|
|
2014-12-03 00:13:26 +08:00
|
|
|
case IOCTL_GNTDEV_GRANT_COPY:
|
|
|
|
return gntdev_ioctl_grant_copy(priv, ptr);
|
|
|
|
|
2018-07-20 17:01:48 +08:00
|
|
|
#ifdef CONFIG_XEN_GNTDEV_DMABUF
|
|
|
|
case IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS:
|
|
|
|
return gntdev_ioctl_dmabuf_exp_from_refs(priv, use_ptemod, ptr);
|
|
|
|
|
|
|
|
case IOCTL_GNTDEV_DMABUF_EXP_WAIT_RELEASED:
|
|
|
|
return gntdev_ioctl_dmabuf_exp_wait_released(priv, ptr);
|
|
|
|
|
|
|
|
case IOCTL_GNTDEV_DMABUF_IMP_TO_REFS:
|
|
|
|
return gntdev_ioctl_dmabuf_imp_to_refs(priv, ptr);
|
|
|
|
|
|
|
|
case IOCTL_GNTDEV_DMABUF_IMP_RELEASE:
|
|
|
|
return gntdev_ioctl_dmabuf_imp_release(priv, ptr);
|
|
|
|
#endif
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
default:
|
|
|
|
pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
|
|
|
|
return -ENOIOCTLCMD;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
struct gntdev_priv *priv = flip->private_data;
|
|
|
|
int index = vma->vm_pgoff;
|
2016-05-24 08:04:32 +08:00
|
|
|
int count = vma_pages(vma);
|
2018-07-20 17:01:47 +08:00
|
|
|
struct gntdev_grant_map *map;
|
2019-05-14 08:22:23 +08:00
|
|
|
int err = -EINVAL;
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
index, count, vma->vm_start, vma->vm_pgoff);
|
2010-12-15 02:40:46 +08:00
|
|
|
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_lock(&priv->lock);
|
2010-12-15 02:40:46 +08:00
|
|
|
map = gntdev_find_map_index(priv, index, count);
|
|
|
|
if (!map)
|
|
|
|
goto unlock_out;
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
if (!atomic_add_unless(&map->in_use, 1, 1))
|
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-06-22 10:27:26 +08:00
|
|
|
goto unlock_out;
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
|
2017-03-06 22:21:16 +08:00
|
|
|
refcount_inc(&map->users);
|
2011-02-04 01:19:01 +08:00
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
vma->vm_ops = &gntdev_vmops;
|
|
|
|
|
2023-01-27 03:37:49 +08:00
|
|
|
vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP);
|
2011-03-08 04:18:57 +08:00
|
|
|
|
|
|
|
if (use_ptemod)
|
2023-01-27 03:37:49 +08:00
|
|
|
vm_flags_set(vma, VM_DONTCOPY);
|
2010-12-15 02:40:46 +08:00
|
|
|
|
|
|
|
vma->vm_private_data = map;
|
2011-02-10 05:11:32 +08:00
|
|
|
if (map->flags) {
|
|
|
|
if ((vma->vm_flags & VM_WRITE) &&
|
|
|
|
(map->flags & GNTMAP_readonly))
|
2011-03-19 13:45:43 +08:00
|
|
|
goto out_unlock_put;
|
2011-02-10 05:11:32 +08:00
|
|
|
} else {
|
|
|
|
map->flags = GNTMAP_host_map;
|
|
|
|
if (!(vma->vm_flags & VM_WRITE))
|
|
|
|
map->flags |= GNTMAP_readonly;
|
|
|
|
}
|
2010-12-15 02:40:46 +08:00
|
|
|
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
map->pages_vm_start = vma->vm_start;
|
|
|
|
|
2019-11-13 04:22:31 +08:00
|
|
|
if (use_ptemod) {
|
|
|
|
err = mmu_interval_notifier_insert_locked(
|
|
|
|
&map->notifier, vma->vm_mm, vma->vm_start,
|
|
|
|
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
if (err)
|
2019-11-13 04:22:31 +08:00
|
|
|
goto out_unlock_put;
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
|
|
|
|
map->notifier_init = true;
|
2019-11-13 04:22:31 +08:00
|
|
|
}
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_unlock(&priv->lock);
|
2011-01-07 19:51:47 +08:00
|
|
|
|
2011-02-04 01:19:02 +08:00
|
|
|
if (use_ptemod) {
|
2020-01-28 23:31:26 +08:00
|
|
|
/*
|
|
|
|
* gntdev takes the address of the PTE in find_grant_ptes() and
|
|
|
|
* passes it to the hypervisor in gntdev_map_grant_pages(). The
|
|
|
|
* purpose of the notifier is to prevent the hypervisor pointer
|
|
|
|
* to the PTE from going stale.
|
|
|
|
*
|
|
|
|
* Since this vma's mappings can't be touched without the
|
2020-06-09 12:33:54 +08:00
|
|
|
* mmap_lock, and we are holding it now, there is no need for
|
2020-01-28 23:31:26 +08:00
|
|
|
* the notifier_range locking pattern.
|
|
|
|
*/
|
|
|
|
mmu_interval_read_begin(&map->notifier);
|
|
|
|
|
2011-02-04 01:19:02 +08:00
|
|
|
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
|
|
|
|
vma->vm_end - vma->vm_start,
|
|
|
|
find_grant_ptes, map);
|
|
|
|
if (err) {
|
2013-06-28 18:21:41 +08:00
|
|
|
pr_warn("find_grant_ptes() failure.\n");
|
2011-02-04 03:16:54 +08:00
|
|
|
goto out_put_map;
|
2011-02-04 01:19:02 +08:00
|
|
|
}
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
|
|
|
|
2018-07-20 17:01:47 +08:00
|
|
|
err = gntdev_map_grant_pages(map);
|
2011-02-04 03:16:54 +08:00
|
|
|
if (err)
|
|
|
|
goto out_put_map;
|
2011-01-07 19:51:47 +08:00
|
|
|
|
2011-02-04 01:19:02 +08:00
|
|
|
if (!use_ptemod) {
|
xen/gntdev.c: Replace vm_map_pages() with vm_map_pages_zero()
'commit df9bde015a72 ("xen/gntdev.c: convert to use vm_map_pages()")'
breaks gntdev driver. If vma->vm_pgoff > 0, vm_map_pages()
will:
- use map->pages starting at vma->vm_pgoff instead of 0
- verify map->count against vma_pages()+vma->vm_pgoff instead of just
vma_pages().
In practice, this breaks using a single gntdev FD for mapping multiple
grants.
relevant strace output:
[pid 857] ioctl(7, IOCTL_GNTDEV_MAP_GRANT_REF, 0x7ffd3407b6d0) = 0
[pid 857] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, 7, 0) =
0x777f1211b000
[pid 857] ioctl(7, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, 0x7ffd3407b710) = 0
[pid 857] ioctl(7, IOCTL_GNTDEV_MAP_GRANT_REF, 0x7ffd3407b6d0) = 0
[pid 857] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, 7,
0x1000) = -1 ENXIO (No such device or address)
details here:
https://github.com/QubesOS/qubes-issues/issues/5199
The reason is -> ( copying Marek's word from discussion)
vma->vm_pgoff is used as index passed to gntdev_find_map_index. It's
basically using this parameter for "which grant reference to map".
map struct returned by gntdev_find_map_index() describes just the pages
to be mapped. Specifically map->pages[0] should be mapped at
vma->vm_start, not vma->vm_start+vma->vm_pgoff*PAGE_SIZE.
When trying to map grant with index (aka vma->vm_pgoff) > 1,
__vm_map_pages() will refuse to map it because it will expect map->count
to be at least vma_pages(vma)+vma->vm_pgoff, while it is exactly
vma_pages(vma).
Converting vm_map_pages() to use vm_map_pages_zero() will fix the
problem.
Marek has tested and confirmed the same.
Cc: stable@vger.kernel.org # v5.2+
Fixes: df9bde015a72 ("xen/gntdev.c: convert to use vm_map_pages()")
Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
2019-07-31 02:34:56 +08:00
|
|
|
err = vm_map_pages_zero(vma, map->pages, map->count);
|
2019-05-14 08:22:23 +08:00
|
|
|
if (err)
|
|
|
|
goto out_put_map;
|
2011-02-04 01:19:02 +08:00
|
|
|
}
|
|
|
|
|
2011-01-07 19:51:47 +08:00
|
|
|
return 0;
|
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
unlock_out:
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_unlock(&priv->lock);
|
2010-12-15 02:40:46 +08:00
|
|
|
return err;
|
2011-02-04 03:16:54 +08:00
|
|
|
|
2011-03-19 13:45:43 +08:00
|
|
|
out_unlock_put:
|
2015-01-10 02:06:12 +08:00
|
|
|
mutex_unlock(&priv->lock);
|
2011-02-04 03:16:54 +08:00
|
|
|
out_put_map:
|
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable@vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross@suse.com>
2022-10-03 06:20:06 +08:00
|
|
|
if (use_ptemod)
|
2018-01-09 20:10:22 +08:00
|
|
|
unmap_grant_pages(map, 0, map->count);
|
2013-01-03 06:57:12 +08:00
|
|
|
gntdev_put_map(priv, map);
|
2011-02-04 03:16:54 +08:00
|
|
|
return err;
|
2010-12-15 02:40:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct file_operations gntdev_fops = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.open = gntdev_open,
|
|
|
|
.release = gntdev_release,
|
|
|
|
.mmap = gntdev_mmap,
|
|
|
|
.unlocked_ioctl = gntdev_ioctl
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct miscdevice gntdev_miscdev = {
|
|
|
|
.minor = MISC_DYNAMIC_MINOR,
|
|
|
|
.name = "xen/gntdev",
|
|
|
|
.fops = &gntdev_fops,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
|
|
|
|
static int __init gntdev_init(void)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!xen_domain())
|
|
|
|
return -ENODEV;
|
|
|
|
|
2014-01-03 23:20:18 +08:00
|
|
|
use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
|
2011-02-04 01:19:02 +08:00
|
|
|
|
2010-12-15 02:40:46 +08:00
|
|
|
err = misc_register(&gntdev_miscdev);
|
|
|
|
if (err != 0) {
|
2013-06-28 18:21:41 +08:00
|
|
|
pr_err("Could not register gntdev device\n");
|
2010-12-15 02:40:46 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit gntdev_exit(void)
|
|
|
|
{
|
|
|
|
misc_deregister(&gntdev_miscdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(gntdev_init);
|
|
|
|
module_exit(gntdev_exit);
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------------ */
|