mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 21:38:32 +08:00
libceph: crush_location infrastructure
Allow expressing client's location in terms of CRUSH hierarchy as a set of (bucket type name, bucket name) pairs. The userspace syntax "crush_location = key1=value1 key2=value2" is incompatible with mount options and needed adaptation. Key-value pairs are separated by '|' and we use ':' instead of '=' to separate keys from values. So for: crush_location = host=foo rack=bar one would write: crush_location=host:foo|rack:bar As in userspace, "multipath" locations are supported, so indicating locality for parallel hierarchies is possible: crush_location=rack:foo1|rack:foo2|datacenter:bar Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Jeff Layton <jlayton@kernel.org>
This commit is contained in:
parent
86403a92c3
commit
45e6aa9f55
@ -64,6 +64,7 @@ struct ceph_options {
|
|||||||
int num_mon;
|
int num_mon;
|
||||||
char *name;
|
char *name;
|
||||||
struct ceph_crypto_key *key;
|
struct ceph_crypto_key *key;
|
||||||
|
struct rb_root crush_locs;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -302,9 +302,23 @@ bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
|
|||||||
int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
|
int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
|
||||||
const struct ceph_pg *raw_pgid);
|
const struct ceph_pg *raw_pgid);
|
||||||
|
|
||||||
|
struct crush_loc {
|
||||||
|
char *cl_type_name;
|
||||||
|
char *cl_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct crush_loc_node {
|
||||||
|
struct rb_node cl_node;
|
||||||
|
struct crush_loc cl_loc; /* pointers into cl_data */
|
||||||
|
char cl_data[];
|
||||||
|
};
|
||||||
|
|
||||||
|
int ceph_parse_crush_location(char *crush_location, struct rb_root *locs);
|
||||||
|
int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2);
|
||||||
|
void ceph_clear_crush_locs(struct rb_root *locs);
|
||||||
|
|
||||||
extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
|
extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
|
||||||
u64 id);
|
u64 id);
|
||||||
|
|
||||||
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
|
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
|
||||||
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
|
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
|
||||||
u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
|
u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
|
||||||
|
@ -176,6 +176,10 @@ int ceph_compare_options(struct ceph_options *new_opt,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret = ceph_compare_crush_locs(&opt1->crush_locs, &opt2->crush_locs);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
/* any matching mon ip implies a match */
|
/* any matching mon ip implies a match */
|
||||||
for (i = 0; i < opt1->num_mon; i++) {
|
for (i = 0; i < opt1->num_mon; i++) {
|
||||||
if (ceph_monmap_contains(client->monc.monmap,
|
if (ceph_monmap_contains(client->monc.monmap,
|
||||||
@ -260,6 +264,7 @@ enum {
|
|||||||
Opt_secret,
|
Opt_secret,
|
||||||
Opt_key,
|
Opt_key,
|
||||||
Opt_ip,
|
Opt_ip,
|
||||||
|
Opt_crush_location,
|
||||||
/* string args above */
|
/* string args above */
|
||||||
Opt_share,
|
Opt_share,
|
||||||
Opt_crc,
|
Opt_crc,
|
||||||
@ -274,6 +279,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
|
|||||||
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
|
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
|
||||||
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
|
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
|
||||||
fsparam_flag_no ("crc", Opt_crc),
|
fsparam_flag_no ("crc", Opt_crc),
|
||||||
|
fsparam_string ("crush_location", Opt_crush_location),
|
||||||
fsparam_string ("fsid", Opt_fsid),
|
fsparam_string ("fsid", Opt_fsid),
|
||||||
fsparam_string ("ip", Opt_ip),
|
fsparam_string ("ip", Opt_ip),
|
||||||
fsparam_string ("key", Opt_key),
|
fsparam_string ("key", Opt_key),
|
||||||
@ -298,6 +304,7 @@ struct ceph_options *ceph_alloc_options(void)
|
|||||||
if (!opt)
|
if (!opt)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
opt->crush_locs = RB_ROOT;
|
||||||
opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
|
opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
if (!opt->mon_addr) {
|
if (!opt->mon_addr) {
|
||||||
@ -320,6 +327,7 @@ void ceph_destroy_options(struct ceph_options *opt)
|
|||||||
if (!opt)
|
if (!opt)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
ceph_clear_crush_locs(&opt->crush_locs);
|
||||||
kfree(opt->name);
|
kfree(opt->name);
|
||||||
if (opt->key) {
|
if (opt->key) {
|
||||||
ceph_crypto_key_destroy(opt->key);
|
ceph_crypto_key_destroy(opt->key);
|
||||||
@ -454,6 +462,16 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
|
|||||||
if (!opt->key)
|
if (!opt->key)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
return get_secret(opt->key, param->string, &log);
|
return get_secret(opt->key, param->string, &log);
|
||||||
|
case Opt_crush_location:
|
||||||
|
ceph_clear_crush_locs(&opt->crush_locs);
|
||||||
|
err = ceph_parse_crush_location(param->string,
|
||||||
|
&opt->crush_locs);
|
||||||
|
if (err) {
|
||||||
|
error_plog(&log, "Failed to parse CRUSH location: %d",
|
||||||
|
err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case Opt_osdtimeout:
|
case Opt_osdtimeout:
|
||||||
warn_plog(&log, "Ignoring osdtimeout");
|
warn_plog(&log, "Ignoring osdtimeout");
|
||||||
@ -536,6 +554,7 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
|
|||||||
{
|
{
|
||||||
struct ceph_options *opt = client->options;
|
struct ceph_options *opt = client->options;
|
||||||
size_t pos = m->count;
|
size_t pos = m->count;
|
||||||
|
struct rb_node *n;
|
||||||
|
|
||||||
if (opt->name) {
|
if (opt->name) {
|
||||||
seq_puts(m, "name=");
|
seq_puts(m, "name=");
|
||||||
@ -545,6 +564,23 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
|
|||||||
if (opt->key)
|
if (opt->key)
|
||||||
seq_puts(m, "secret=<hidden>,");
|
seq_puts(m, "secret=<hidden>,");
|
||||||
|
|
||||||
|
if (!RB_EMPTY_ROOT(&opt->crush_locs)) {
|
||||||
|
seq_puts(m, "crush_location=");
|
||||||
|
for (n = rb_first(&opt->crush_locs); ; ) {
|
||||||
|
struct crush_loc_node *loc =
|
||||||
|
rb_entry(n, struct crush_loc_node, cl_node);
|
||||||
|
|
||||||
|
seq_printf(m, "%s:%s", loc->cl_loc.cl_type_name,
|
||||||
|
loc->cl_loc.cl_name);
|
||||||
|
n = rb_next(n);
|
||||||
|
if (!n)
|
||||||
|
break;
|
||||||
|
|
||||||
|
seq_putc(m, '|');
|
||||||
|
}
|
||||||
|
seq_putc(m, ',');
|
||||||
|
}
|
||||||
|
|
||||||
if (opt->flags & CEPH_OPT_FSID)
|
if (opt->flags & CEPH_OPT_FSID)
|
||||||
seq_printf(m, "fsid=%pU,", &opt->fsid);
|
seq_printf(m, "fsid=%pU,", &opt->fsid);
|
||||||
if (opt->flags & CEPH_OPT_NOSHARE)
|
if (opt->flags & CEPH_OPT_NOSHARE)
|
||||||
|
@ -2715,3 +2715,119 @@ int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
|
|||||||
return acting.primary;
|
return acting.primary;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(ceph_pg_to_acting_primary);
|
EXPORT_SYMBOL(ceph_pg_to_acting_primary);
|
||||||
|
|
||||||
|
static struct crush_loc_node *alloc_crush_loc(size_t type_name_len,
|
||||||
|
size_t name_len)
|
||||||
|
{
|
||||||
|
struct crush_loc_node *loc;
|
||||||
|
|
||||||
|
loc = kmalloc(sizeof(*loc) + type_name_len + name_len + 2, GFP_NOIO);
|
||||||
|
if (!loc)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
RB_CLEAR_NODE(&loc->cl_node);
|
||||||
|
return loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void free_crush_loc(struct crush_loc_node *loc)
|
||||||
|
{
|
||||||
|
WARN_ON(!RB_EMPTY_NODE(&loc->cl_node));
|
||||||
|
|
||||||
|
kfree(loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int crush_loc_compare(const struct crush_loc *loc1,
|
||||||
|
const struct crush_loc *loc2)
|
||||||
|
{
|
||||||
|
return strcmp(loc1->cl_type_name, loc2->cl_type_name) ?:
|
||||||
|
strcmp(loc1->cl_name, loc2->cl_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_RB_FUNCS2(crush_loc, struct crush_loc_node, cl_loc, crush_loc_compare,
|
||||||
|
RB_BYPTR, const struct crush_loc *, cl_node)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parses a set of <bucket type name>':'<bucket name> pairs separated
|
||||||
|
* by '|', e.g. "rack:foo1|rack:foo2|datacenter:bar".
|
||||||
|
*
|
||||||
|
* Note that @crush_location is modified by strsep().
|
||||||
|
*/
|
||||||
|
int ceph_parse_crush_location(char *crush_location, struct rb_root *locs)
|
||||||
|
{
|
||||||
|
struct crush_loc_node *loc;
|
||||||
|
const char *type_name, *name, *colon;
|
||||||
|
size_t type_name_len, name_len;
|
||||||
|
|
||||||
|
dout("%s '%s'\n", __func__, crush_location);
|
||||||
|
while ((type_name = strsep(&crush_location, "|"))) {
|
||||||
|
colon = strchr(type_name, ':');
|
||||||
|
if (!colon)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
type_name_len = colon - type_name;
|
||||||
|
if (type_name_len == 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
name = colon + 1;
|
||||||
|
name_len = strlen(name);
|
||||||
|
if (name_len == 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
loc = alloc_crush_loc(type_name_len, name_len);
|
||||||
|
if (!loc)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
loc->cl_loc.cl_type_name = loc->cl_data;
|
||||||
|
memcpy(loc->cl_loc.cl_type_name, type_name, type_name_len);
|
||||||
|
loc->cl_loc.cl_type_name[type_name_len] = '\0';
|
||||||
|
|
||||||
|
loc->cl_loc.cl_name = loc->cl_data + type_name_len + 1;
|
||||||
|
memcpy(loc->cl_loc.cl_name, name, name_len);
|
||||||
|
loc->cl_loc.cl_name[name_len] = '\0';
|
||||||
|
|
||||||
|
if (!__insert_crush_loc(locs, loc)) {
|
||||||
|
free_crush_loc(loc);
|
||||||
|
return -EEXIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
dout("%s type_name '%s' name '%s'\n", __func__,
|
||||||
|
loc->cl_loc.cl_type_name, loc->cl_loc.cl_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2)
|
||||||
|
{
|
||||||
|
struct rb_node *n1 = rb_first(locs1);
|
||||||
|
struct rb_node *n2 = rb_first(locs2);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
for ( ; n1 && n2; n1 = rb_next(n1), n2 = rb_next(n2)) {
|
||||||
|
struct crush_loc_node *loc1 =
|
||||||
|
rb_entry(n1, struct crush_loc_node, cl_node);
|
||||||
|
struct crush_loc_node *loc2 =
|
||||||
|
rb_entry(n2, struct crush_loc_node, cl_node);
|
||||||
|
|
||||||
|
ret = crush_loc_compare(&loc1->cl_loc, &loc2->cl_loc);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!n1 && n2)
|
||||||
|
return -1;
|
||||||
|
if (n1 && !n2)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ceph_clear_crush_locs(struct rb_root *locs)
|
||||||
|
{
|
||||||
|
while (!RB_EMPTY_ROOT(locs)) {
|
||||||
|
struct crush_loc_node *loc =
|
||||||
|
rb_entry(rb_first(locs), struct crush_loc_node, cl_node);
|
||||||
|
|
||||||
|
erase_crush_loc(locs, loc);
|
||||||
|
free_crush_loc(loc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user