linux/drivers/md/dm-cache-background-tracker.c

259 lines
5.3 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2017 Red Hat. All rights reserved.
*
* This file is released under the GPL.
*/
#include "dm-cache-background-tracker.h"
/*----------------------------------------------------------------*/
#define DM_MSG_PREFIX "dm-background-tracker"
struct bt_work {
struct list_head list;
struct rb_node node;
struct policy_work work;
};
struct background_tracker {
unsigned int max_work;
atomic_t pending_promotes;
atomic_t pending_writebacks;
atomic_t pending_demotes;
struct list_head issued;
struct list_head queued;
struct rb_root pending;
struct kmem_cache *work_cache;
};
struct background_tracker *btracker_create(unsigned int max_work)
{
struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
if (!b) {
DMERR("couldn't create background_tracker");
return NULL;
}
b->max_work = max_work;
atomic_set(&b->pending_promotes, 0);
atomic_set(&b->pending_writebacks, 0);
atomic_set(&b->pending_demotes, 0);
INIT_LIST_HEAD(&b->issued);
INIT_LIST_HEAD(&b->queued);
b->pending = RB_ROOT;
b->work_cache = KMEM_CACHE(bt_work, 0);
if (!b->work_cache) {
DMERR("couldn't create mempool for background work items");
kfree(b);
b = NULL;
}
return b;
}
EXPORT_SYMBOL_GPL(btracker_create);
void btracker_destroy(struct background_tracker *b)
{
dm cache: free background tracker's queued work in btracker_destroy Otherwise the kernel can BUG with: [ 2245.426978] ============================================================================= [ 2245.435155] BUG bt_work (Tainted: G B W ): Objects remaining in bt_work on __kmem_cache_shutdown() [ 2245.445233] ----------------------------------------------------------------------------- [ 2245.445233] [ 2245.454879] Slab 0x00000000b0ce2b30 objects=64 used=2 fp=0x000000000a3c6a4e flags=0x17ffffc0000200(slab|node=0|zone=2|lastcpupid=0x1fffff) [ 2245.467300] CPU: 7 PID: 10805 Comm: lvm Kdump: loaded Tainted: G B W 6.0.0-rc2 #19 [ 2245.476078] Hardware name: Dell Inc. PowerEdge R7525/0590KW, BIOS 2.5.6 10/06/2021 [ 2245.483646] Call Trace: [ 2245.486100] <TASK> [ 2245.488206] dump_stack_lvl+0x34/0x48 [ 2245.491878] slab_err+0x95/0xcd [ 2245.495028] __kmem_cache_shutdown.cold+0x31/0x136 [ 2245.499821] kmem_cache_destroy+0x49/0x130 [ 2245.503928] btracker_destroy+0x12/0x20 [dm_cache] [ 2245.508728] smq_destroy+0x15/0x60 [dm_cache_smq] [ 2245.513435] dm_cache_policy_destroy+0x12/0x20 [dm_cache] [ 2245.518834] destroy+0xc0/0x110 [dm_cache] [ 2245.522933] dm_table_destroy+0x5c/0x120 [dm_mod] [ 2245.527649] __dm_destroy+0x10e/0x1c0 [dm_mod] [ 2245.532102] dev_remove+0x117/0x190 [dm_mod] [ 2245.536384] ctl_ioctl+0x1a2/0x290 [dm_mod] [ 2245.540579] dm_ctl_ioctl+0xa/0x20 [dm_mod] [ 2245.544773] __x64_sys_ioctl+0x8a/0xc0 [ 2245.548524] do_syscall_64+0x5c/0x90 [ 2245.552104] ? syscall_exit_to_user_mode+0x12/0x30 [ 2245.556897] ? do_syscall_64+0x69/0x90 [ 2245.560648] ? do_syscall_64+0x69/0x90 [ 2245.564394] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 2245.569447] RIP: 0033:0x7fe52583ec6b ... [ 2245.646771] ------------[ cut here ]------------ [ 2245.651395] kmem_cache_destroy bt_work: Slab cache still has objects when called from btracker_destroy+0x12/0x20 [dm_cache] [ 2245.651408] WARNING: CPU: 7 PID: 10805 at mm/slab_common.c:478 kmem_cache_destroy+0x128/0x130 Found using: lvm2-testsuite --only "cache-single-split.sh" Ben bisected and found that commit 0495e337b703 ("mm/slab_common: Deleting kobject in kmem_cache_destroy() without holding slab_mutex/cpu_hotplug_lock") first exposed dm-cache's incomplete cleanup of its background tracker work objects. Reported-by: Benjamin Marzinski <bmarzins@redhat.com> Tested-by: Benjamin Marzinski <bmarzins@redhat.com> Cc: stable@vger.kernel.org # 6.0+ Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@kernel.org>
2023-01-26 17:59:10 +08:00
struct bt_work *w, *tmp;
BUG_ON(!list_empty(&b->issued));
list_for_each_entry_safe (w, tmp, &b->queued, list) {
list_del(&w->list);
kmem_cache_free(b->work_cache, w);
}
kmem_cache_destroy(b->work_cache);
kfree(b);
}
EXPORT_SYMBOL_GPL(btracker_destroy);
static int cmp_oblock(dm_oblock_t lhs, dm_oblock_t rhs)
{
if (from_oblock(lhs) < from_oblock(rhs))
return -1;
if (from_oblock(rhs) < from_oblock(lhs))
return 1;
return 0;
}
static bool __insert_pending(struct background_tracker *b,
struct bt_work *nw)
{
int cmp;
struct bt_work *w;
struct rb_node **new = &b->pending.rb_node, *parent = NULL;
while (*new) {
w = container_of(*new, struct bt_work, node);
parent = *new;
cmp = cmp_oblock(w->work.oblock, nw->work.oblock);
if (cmp < 0)
new = &((*new)->rb_left);
else if (cmp > 0)
new = &((*new)->rb_right);
else
/* already present */
return false;
}
rb_link_node(&nw->node, parent, new);
rb_insert_color(&nw->node, &b->pending);
return true;
}
static struct bt_work *__find_pending(struct background_tracker *b,
dm_oblock_t oblock)
{
int cmp;
struct bt_work *w;
struct rb_node **new = &b->pending.rb_node;
while (*new) {
w = container_of(*new, struct bt_work, node);
cmp = cmp_oblock(w->work.oblock, oblock);
if (cmp < 0)
new = &((*new)->rb_left);
else if (cmp > 0)
new = &((*new)->rb_right);
else
break;
}
return *new ? w : NULL;
}
static void update_stats(struct background_tracker *b, struct policy_work *w, int delta)
{
switch (w->op) {
case POLICY_PROMOTE:
atomic_add(delta, &b->pending_promotes);
break;
case POLICY_DEMOTE:
atomic_add(delta, &b->pending_demotes);
break;
case POLICY_WRITEBACK:
atomic_add(delta, &b->pending_writebacks);
break;
}
}
unsigned int btracker_nr_writebacks_queued(struct background_tracker *b)
{
return atomic_read(&b->pending_writebacks);
}
EXPORT_SYMBOL_GPL(btracker_nr_writebacks_queued);
unsigned int btracker_nr_demotions_queued(struct background_tracker *b)
{
return atomic_read(&b->pending_demotes);
}
EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued);
static bool max_work_reached(struct background_tracker *b)
{
return atomic_read(&b->pending_promotes) +
atomic_read(&b->pending_writebacks) +
atomic_read(&b->pending_demotes) >= b->max_work;
}
static struct bt_work *alloc_work(struct background_tracker *b)
{
if (max_work_reached(b))
return NULL;
return kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
}
int btracker_queue(struct background_tracker *b,
struct policy_work *work,
struct policy_work **pwork)
{
struct bt_work *w;
if (pwork)
*pwork = NULL;
w = alloc_work(b);
if (!w)
return -ENOMEM;
memcpy(&w->work, work, sizeof(*work));
if (!__insert_pending(b, w)) {
/*
* There was a race, we'll just ignore this second
* bit of work for the same oblock.
*/
kmem_cache_free(b->work_cache, w);
return -EINVAL;
}
if (pwork) {
*pwork = &w->work;
list_add(&w->list, &b->issued);
} else
list_add(&w->list, &b->queued);
update_stats(b, &w->work, 1);
return 0;
}
EXPORT_SYMBOL_GPL(btracker_queue);
/*
* Returns -ENODATA if there's no work.
*/
int btracker_issue(struct background_tracker *b, struct policy_work **work)
{
struct bt_work *w;
if (list_empty(&b->queued))
return -ENODATA;
w = list_first_entry(&b->queued, struct bt_work, list);
list_move(&w->list, &b->issued);
*work = &w->work;
return 0;
}
EXPORT_SYMBOL_GPL(btracker_issue);
void btracker_complete(struct background_tracker *b,
struct policy_work *op)
{
struct bt_work *w = container_of(op, struct bt_work, work);
update_stats(b, &w->work, -1);
rb_erase(&w->node, &b->pending);
list_del(&w->list);
kmem_cache_free(b->work_cache, w);
}
EXPORT_SYMBOL_GPL(btracker_complete);
bool btracker_promotion_already_present(struct background_tracker *b,
dm_oblock_t oblock)
{
return __find_pending(b, oblock) != NULL;
}
EXPORT_SYMBOL_GPL(btracker_promotion_already_present);
/*----------------------------------------------------------------*/