2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-17 17:53:56 +08:00

virtio-mem: don't special-case ZONE_MOVABLE

When introducing virtio-mem, the semantics of ZONE_MOVABLE were rather
unclear, which is why we special-cased ZONE_MOVABLE such that partially
plugged blocks would never end up in ZONE_MOVABLE.

Now that the semantics are much clearer (and will be documented in a
follow-up patch including the new virtio-mem behavior), let's allow to
online partially plugged memory blocks to ZONE_MOVABLE and also consider
memory blocks that were onlined to ZONE_MOVABLE when unplugging memory.
While unplugged memory pages are, in general, unmovable, they can be
skipped when offlining memory.

virtio-mem only unplugs fairly big chunks (in the megabyte range) and
rather tries to shrink the memory region than randomly choosing memory.
In theory, if all other pages in the movable zone would be movable,
virtio-mem would only shrink that zone and not create any kind of
fragmentation.

In the future, we might want to remember the zone again and use the
information when (un)plugging memory.  For now, let's keep it simple.

Note: Support for defragmentation is planned, to deal with fragmentation
after unplug due to memory chunks within memory blocks that could not get
unplugged before (e.g., somebody pinning pages within ZONE_MOVABLE for a
longer time).

Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Qian Cai <cai@lca.pw>
Link: http://lkml.kernel.org/r/20200816125333.7434-6-david@redhat.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
David Hildenbrand 2020-10-13 16:55:31 -07:00 committed by Linus Torvalds
parent 1c31cb493c
commit 27f852795a

View File

@ -36,18 +36,10 @@ enum virtio_mem_mb_state {
VIRTIO_MEM_MB_STATE_OFFLINE,
/* Partially plugged, fully added to Linux, offline. */
VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL,
/* Fully plugged, fully added to Linux, online (!ZONE_MOVABLE). */
/* Fully plugged, fully added to Linux, online. */
VIRTIO_MEM_MB_STATE_ONLINE,
/* Partially plugged, fully added to Linux, online (!ZONE_MOVABLE). */
/* Partially plugged, fully added to Linux, online. */
VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL,
/*
* Fully plugged, fully added to Linux, online (ZONE_MOVABLE).
* We are not allowed to allocate (unplug) parts of this block that
* are not movable (similar to gigantic pages). We will never allow
* to online OFFLINE_PARTIAL to ZONE_MOVABLE (as they would contain
* unmovable parts).
*/
VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE,
VIRTIO_MEM_MB_STATE_COUNT
};
@ -526,21 +518,10 @@ static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id)
}
static int virtio_mem_notify_going_online(struct virtio_mem *vm,
unsigned long mb_id,
enum zone_type zone)
unsigned long mb_id)
{
switch (virtio_mem_mb_get_state(vm, mb_id)) {
case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
/*
* We won't allow to online a partially plugged memory block
* to the MOVABLE zone - it would contain unmovable parts.
*/
if (zone == ZONE_MOVABLE) {
dev_warn_ratelimited(&vm->vdev->dev,
"memory block has holes, MOVABLE not supported\n");
return NOTIFY_BAD;
}
return NOTIFY_OK;
case VIRTIO_MEM_MB_STATE_OFFLINE:
return NOTIFY_OK;
default:
@ -560,7 +541,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm,
VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
break;
case VIRTIO_MEM_MB_STATE_ONLINE:
case VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE:
virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_OFFLINE);
break;
@ -579,24 +559,17 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm,
virtio_mem_retry(vm);
}
static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id,
enum zone_type zone)
static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id)
{
unsigned long nb_offline;
switch (virtio_mem_mb_get_state(vm, mb_id)) {
case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
BUG_ON(zone == ZONE_MOVABLE);
virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
break;
case VIRTIO_MEM_MB_STATE_OFFLINE:
if (zone == ZONE_MOVABLE)
virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE);
else
virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE);
virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE);
break;
default:
BUG();
@ -675,7 +648,6 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
const unsigned long start = PFN_PHYS(mhp->start_pfn);
const unsigned long size = PFN_PHYS(mhp->nr_pages);
const unsigned long mb_id = virtio_mem_phys_to_mb_id(start);
enum zone_type zone;
int rc = NOTIFY_OK;
if (!virtio_mem_overlaps_range(vm, start, size))
@ -717,8 +689,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
break;
}
vm->hotplug_active = true;
zone = page_zonenum(pfn_to_page(mhp->start_pfn));
rc = virtio_mem_notify_going_online(vm, mb_id, zone);
rc = virtio_mem_notify_going_online(vm, mb_id);
break;
case MEM_OFFLINE:
virtio_mem_notify_offline(vm, mb_id);
@ -726,8 +697,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
mutex_unlock(&vm->hotplug_mutex);
break;
case MEM_ONLINE:
zone = page_zonenum(pfn_to_page(mhp->start_pfn));
virtio_mem_notify_online(vm, mb_id, zone);
virtio_mem_notify_online(vm, mb_id);
vm->hotplug_active = false;
mutex_unlock(&vm->hotplug_mutex);
break;
@ -1906,8 +1876,7 @@ static void virtio_mem_remove(struct virtio_device *vdev)
if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE]) {
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) {
dev_warn(&vdev->dev, "device still has system memory added\n");
} else {
virtio_mem_delete_resource(vm);