2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2025-01-16 17:43:56 +08:00

dm ioctl: replace device hash with red-black tree

For high numbers of DM devices the 64-entry hash table has non-trivial
overhead. Fix this by replacing the hash table with a red-black tree.

Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
Mikulas Patocka 2021-03-10 05:18:03 -05:00 committed by Mike Snitzer
parent 219a9b5e73
commit b82096afc8

View File

@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/dm-ioctl.h>
#include <linux/hdreg.h>
#include <linux/compat.h>
@ -36,8 +37,10 @@ struct dm_file {
* name or uuid.
*---------------------------------------------------------------*/
struct hash_cell {
struct list_head name_list;
struct list_head uuid_list;
struct rb_node name_node;
struct rb_node uuid_node;
bool name_set;
bool uuid_set;
char *name;
char *uuid;
@ -53,10 +56,8 @@ struct vers_iter {
};
#define NUM_BUCKETS 64
#define MASK_BUCKETS (NUM_BUCKETS - 1)
static struct list_head _name_buckets[NUM_BUCKETS];
static struct list_head _uuid_buckets[NUM_BUCKETS];
static struct rb_root name_rb_tree = RB_ROOT;
static struct rb_root uuid_rb_tree = RB_ROOT;
static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred);
@ -70,73 +71,110 @@ static DECLARE_RWSEM(_hash_lock);
*/
static DEFINE_MUTEX(dm_hash_cells_mutex);
static void init_buckets(struct list_head *buckets)
{
unsigned int i;
for (i = 0; i < NUM_BUCKETS; i++)
INIT_LIST_HEAD(buckets + i);
}
static int dm_hash_init(void)
{
init_buckets(_name_buckets);
init_buckets(_uuid_buckets);
return 0;
}
static void dm_hash_exit(void)
{
dm_hash_remove_all(false, false, false);
}
/*-----------------------------------------------------------------
* Hash function:
* We're not really concerned with the str hash function being
* fast since it's only used by the ioctl interface.
*---------------------------------------------------------------*/
static unsigned int hash_str(const char *str)
{
const unsigned int hash_mult = 2654435387U;
unsigned int h = 0;
while (*str)
h = (h + (unsigned int) *str++) * hash_mult;
return h & MASK_BUCKETS;
}
/*-----------------------------------------------------------------
* Code for looking up a device by name
*---------------------------------------------------------------*/
static struct hash_cell *__get_name_cell(const char *str)
{
struct hash_cell *hc;
unsigned int h = hash_str(str);
struct rb_node *n = name_rb_tree.rb_node;
list_for_each_entry (hc, _name_buckets + h, name_list)
if (!strcmp(hc->name, str)) {
while (n) {
struct hash_cell *hc = container_of(n, struct hash_cell, name_node);
int c = strcmp(hc->name, str);
if (!c) {
dm_get(hc->md);
return hc;
}
n = c >= 0 ? n->rb_left : n->rb_right;
}
return NULL;
}
static struct hash_cell *__get_uuid_cell(const char *str)
{
struct hash_cell *hc;
unsigned int h = hash_str(str);
struct rb_node *n = uuid_rb_tree.rb_node;
list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
if (!strcmp(hc->uuid, str)) {
while (n) {
struct hash_cell *hc = container_of(n, struct hash_cell, uuid_node);
int c = strcmp(hc->uuid, str);
if (!c) {
dm_get(hc->md);
return hc;
}
n = c >= 0 ? n->rb_left : n->rb_right;
}
return NULL;
}
static void __unlink_name(struct hash_cell *hc)
{
if (hc->name_set) {
hc->name_set = false;
rb_erase(&hc->name_node, &name_rb_tree);
}
}
static void __unlink_uuid(struct hash_cell *hc)
{
if (hc->uuid_set) {
hc->uuid_set = false;
rb_erase(&hc->uuid_node, &uuid_rb_tree);
}
}
static void __link_name(struct hash_cell *new_hc)
{
struct rb_node **n, *parent;
__unlink_name(new_hc);
new_hc->name_set = true;
n = &name_rb_tree.rb_node;
parent = NULL;
while (*n) {
struct hash_cell *hc = container_of(*n, struct hash_cell, name_node);
int c = strcmp(hc->name, new_hc->name);
BUG_ON(!c);
parent = *n;
n = c >= 0 ? &hc->name_node.rb_left : &hc->name_node.rb_right;
}
rb_link_node(&new_hc->name_node, parent, n);
rb_insert_color(&new_hc->name_node, &name_rb_tree);
}
static void __link_uuid(struct hash_cell *new_hc)
{
struct rb_node **n, *parent;
__unlink_uuid(new_hc);
new_hc->uuid_set = true;
n = &uuid_rb_tree.rb_node;
parent = NULL;
while (*n) {
struct hash_cell *hc = container_of(*n, struct hash_cell, uuid_node);
int c = strcmp(hc->uuid, new_hc->uuid);
BUG_ON(!c);
parent = *n;
n = c > 0 ? &hc->uuid_node.rb_left : &hc->uuid_node.rb_right;
}
rb_link_node(&new_hc->uuid_node, parent, n);
rb_insert_color(&new_hc->uuid_node, &uuid_rb_tree);
}
static struct hash_cell *__get_dev_cell(uint64_t dev)
{
struct mapped_device *md;
@ -185,8 +223,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid,
}
}
INIT_LIST_HEAD(&hc->name_list);
INIT_LIST_HEAD(&hc->uuid_list);
hc->name_set = hc->uuid_set = false;
hc->md = md;
hc->new_map = NULL;
return hc;
@ -226,16 +263,16 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
goto bad;
}
list_add(&cell->name_list, _name_buckets + hash_str(name));
__link_name(cell);
if (uuid) {
hc = __get_uuid_cell(uuid);
if (hc) {
list_del(&cell->name_list);
__unlink_name(cell);
dm_put(hc->md);
goto bad;
}
list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
__link_uuid(cell);
}
dm_get(md);
mutex_lock(&dm_hash_cells_mutex);
@ -256,9 +293,9 @@ static struct dm_table *__hash_remove(struct hash_cell *hc)
struct dm_table *table;
int srcu_idx;
/* remove from the dev hash */
list_del(&hc->uuid_list);
list_del(&hc->name_list);
/* remove from the dev trees */
__unlink_name(hc);
__unlink_uuid(hc);
mutex_lock(&dm_hash_cells_mutex);
dm_set_mdptr(hc->md, NULL);
mutex_unlock(&dm_hash_cells_mutex);
@ -279,7 +316,8 @@ static struct dm_table *__hash_remove(struct hash_cell *hc)
static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred)
{
int i, dev_skipped;
int dev_skipped;
struct rb_node *n;
struct hash_cell *hc;
struct mapped_device *md;
struct dm_table *t;
@ -289,40 +327,39 @@ retry:
down_write(&_hash_lock);
for (i = 0; i < NUM_BUCKETS; i++) {
list_for_each_entry(hc, _name_buckets + i, name_list) {
md = hc->md;
dm_get(md);
for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
hc = container_of(n, struct hash_cell, name_node);
md = hc->md;
dm_get(md);
if (keep_open_devices &&
dm_lock_for_deletion(md, mark_deferred, only_deferred)) {
dm_put(md);
dev_skipped++;
continue;
}
t = __hash_remove(hc);
up_write(&_hash_lock);
if (t) {
dm_sync_table(md);
dm_table_destroy(t);
}
if (keep_open_devices &&
dm_lock_for_deletion(md, mark_deferred, only_deferred)) {
dm_put(md);
if (likely(keep_open_devices))
dm_destroy(md);
else
dm_destroy_immediate(md);
/*
* Some mapped devices may be using other mapped
* devices, so repeat until we make no further
* progress. If a new mapped device is created
* here it will also get removed.
*/
goto retry;
dev_skipped++;
continue;
}
t = __hash_remove(hc);
up_write(&_hash_lock);
if (t) {
dm_sync_table(md);
dm_table_destroy(t);
}
dm_put(md);
if (likely(keep_open_devices))
dm_destroy(md);
else
dm_destroy_immediate(md);
/*
* Some mapped devices may be using other mapped
* devices, so repeat until we make no further
* progress. If a new mapped device is created
* here it will also get removed.
*/
goto retry;
}
up_write(&_hash_lock);
@ -340,7 +377,7 @@ static void __set_cell_uuid(struct hash_cell *hc, char *new_uuid)
hc->uuid = new_uuid;
mutex_unlock(&dm_hash_cells_mutex);
list_add(&hc->uuid_list, _uuid_buckets + hash_str(new_uuid));
__link_uuid(hc);
}
/*
@ -354,14 +391,14 @@ static char *__change_cell_name(struct hash_cell *hc, char *new_name)
/*
* Rename and move the name cell.
*/
list_del(&hc->name_list);
__unlink_name(hc);
old_name = hc->name;
mutex_lock(&dm_hash_cells_mutex);
hc->name = new_name;
mutex_unlock(&dm_hash_cells_mutex);
list_add(&hc->name_list, _name_buckets + hash_str(new_name));
__link_name(hc);
return old_name;
}
@ -505,7 +542,7 @@ static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
unsigned int i;
struct rb_node *n;
struct hash_cell *hc;
size_t len, needed = 0;
struct gendisk *disk;
@ -518,11 +555,10 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
* Loop through all the devices working out how much
* space we need.
*/
for (i = 0; i < NUM_BUCKETS; i++) {
list_for_each_entry (hc, _name_buckets + i, name_list) {
needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1);
needed += align_val(sizeof(uint32_t));
}
for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
hc = container_of(n, struct hash_cell, name_node);
needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1);
needed += align_val(sizeof(uint32_t));
}
/*
@ -540,21 +576,20 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
/*
* Now loop through filling out the names.
*/
for (i = 0; i < NUM_BUCKETS; i++) {
list_for_each_entry (hc, _name_buckets + i, name_list) {
if (old_nl)
old_nl->next = (uint32_t) ((void *) nl -
(void *) old_nl);
disk = dm_disk(hc->md);
nl->dev = huge_encode_dev(disk_devt(disk));
nl->next = 0;
strcpy(nl->name, hc->name);
for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
hc = container_of(n, struct hash_cell, name_node);
if (old_nl)
old_nl->next = (uint32_t) ((void *) nl -
(void *) old_nl);
disk = dm_disk(hc->md);
nl->dev = huge_encode_dev(disk_devt(disk));
nl->next = 0;
strcpy(nl->name, hc->name);
old_nl = nl;
event_nr = align_ptr(nl->name + strlen(hc->name) + 1);
*event_nr = dm_get_event_nr(hc->md);
nl = align_ptr(event_nr + 1);
}
old_nl = nl;
event_nr = align_ptr(nl->name + strlen(hc->name) + 1);
*event_nr = dm_get_event_nr(hc->md);
nl = align_ptr(event_nr + 1);
}
/*
* If mismatch happens, security may be compromised due to buffer
@ -1991,14 +2026,9 @@ int __init dm_interface_init(void)
{
int r;
r = dm_hash_init();
if (r)
return r;
r = misc_register(&_dm_misc);
if (r) {
DMERR("misc_register failed for control device");
dm_hash_exit();
return r;
}