cifs: change iface_list from array to sorted linked list

A server's published interface list can change over time, and needs
to be updated. We've storing iface_list as a simple array, which
makes it difficult to manipulate an existing list.

With this change, iface_list is modified into a linked list of
interfaces, which is kept sorted by speed.

Also added a reference counter for an iface entry, so that each
channel can maintain a backpointer to the iface and drop it
easily when needed.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
This commit is contained in:
Shyam Prasad N 2022-01-01 12:50:21 +00:00 committed by Steve French
parent 9de74996a7
commit aa45dadd34
6 changed files with 231 additions and 159 deletions

View File

@ -162,6 +162,8 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr);
else if (iface->sockaddr.ss_family == AF_INET6)
seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr);
if (!iface->is_active)
seq_puts(m, "\t\t[for-cleanup]\n");
}
static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
@ -221,6 +223,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifs_server_iface *iface;
int c, i, j;
seq_puts(m,
@ -456,11 +459,10 @@ skip_rdma:
if (ses->iface_count)
seq_printf(m, "\n\n\tServer interfaces: %zu",
ses->iface_count);
for (j = 0; j < ses->iface_count; j++) {
struct cifs_server_iface *iface;
iface = &ses->iface_list[j];
seq_printf(m, "\n\t%d)", j+1);
j = 0;
list_for_each_entry(iface, &ses->iface_list,
iface_head) {
seq_printf(m, "\n\t%d)", ++j);
cifs_dump_iface(m, iface);
if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n");

View File

@ -933,15 +933,67 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
#endif
struct cifs_server_iface {
struct list_head iface_head;
struct kref refcount;
size_t speed;
unsigned int rdma_capable : 1;
unsigned int rss_capable : 1;
unsigned int is_active : 1; /* unset if non existent */
struct sockaddr_storage sockaddr;
};
/* release iface when last ref is dropped */
static inline void
release_iface(struct kref *ref)
{
struct cifs_server_iface *iface = container_of(ref,
struct cifs_server_iface,
refcount);
list_del_init(&iface->iface_head);
kfree(iface);
}
/*
* compare two interfaces a and b
* return 0 if everything matches.
* return 1 if a has higher link speed, or rdma capable, or rss capable
* return -1 otherwise.
*/
static inline int
iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b)
{
int cmp_ret = 0;
WARN_ON(!a || !b);
if (a->speed == b->speed) {
if (a->rdma_capable == b->rdma_capable) {
if (a->rss_capable == b->rss_capable) {
cmp_ret = memcmp(&a->sockaddr, &b->sockaddr,
sizeof(a->sockaddr));
if (!cmp_ret)
return 0;
else if (cmp_ret > 0)
return 1;
else
return -1;
} else if (a->rss_capable > b->rss_capable)
return 1;
else
return -1;
} else if (a->rdma_capable > b->rdma_capable)
return 1;
else
return -1;
} else if (a->speed > b->speed)
return 1;
else
return -1;
}
struct cifs_chan {
unsigned int in_reconnect : 1; /* if session setup in progress for this channel */
struct TCP_Server_Info *server;
struct cifs_server_iface *iface; /* interface in use */
__u8 signkey[SMB3_SIGN_KEY_SIZE];
};
@ -993,7 +1045,7 @@ struct cifs_ses {
*/
spinlock_t iface_lock;
/* ========= begin: protected by iface_lock ======== */
struct cifs_server_iface *iface_list;
struct list_head iface_list;
size_t iface_count;
unsigned long iface_last_update; /* jiffies */
/* ========= end: protected by iface_lock ======== */

View File

@ -1894,9 +1894,11 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
int i;
for (i = 1; i < chan_count; i++) {
spin_unlock(&ses->chan_lock);
if (ses->chans[i].iface) {
kref_put(&ses->chans[i].iface->refcount, release_iface);
ses->chans[i].iface = NULL;
}
cifs_put_tcp_session(ses->chans[i].server, 0);
spin_lock(&ses->chan_lock);
ses->chans[i].server = NULL;
}
}

View File

@ -75,6 +75,7 @@ sesInfoAlloc(void)
INIT_LIST_HEAD(&ret_buf->tcon_list);
mutex_init(&ret_buf->session_mutex);
spin_lock_init(&ret_buf->iface_lock);
INIT_LIST_HEAD(&ret_buf->iface_list);
spin_lock_init(&ret_buf->chan_lock);
}
return ret_buf;
@ -83,6 +84,8 @@ sesInfoAlloc(void)
void
sesInfoFree(struct cifs_ses *buf_to_free)
{
struct cifs_server_iface *iface = NULL, *niface = NULL;
if (buf_to_free == NULL) {
cifs_dbg(FYI, "Null buffer passed to sesInfoFree\n");
return;
@ -96,7 +99,11 @@ sesInfoFree(struct cifs_ses *buf_to_free)
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
kfree_sensitive(buf_to_free->auth_key.response);
kfree(buf_to_free->iface_list);
spin_lock(&buf_to_free->iface_lock);
list_for_each_entry_safe(iface, niface, &buf_to_free->iface_list,
iface_head)
kref_put(&iface->refcount, release_iface);
spin_unlock(&buf_to_free->iface_lock);
kfree_sensitive(buf_to_free);
}

View File

@ -58,7 +58,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
spin_lock(&ses->chan_lock);
for (i = 0; i < ses->chan_count; i++) {
if (is_server_using_iface(ses->chans[i].server, iface)) {
if (ses->chans[i].iface == iface) {
spin_unlock(&ses->chan_lock);
return true;
}
@ -151,11 +151,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
{
int old_chan_count, new_chan_count;
int left;
int i = 0;
int rc = 0;
int tries = 0;
struct cifs_server_iface *ifaces = NULL;
size_t iface_count;
struct cifs_server_iface *iface = NULL, *niface = NULL;
spin_lock(&ses->chan_lock);
@ -184,33 +182,17 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
}
spin_unlock(&ses->chan_lock);
/*
* Make a copy of the iface list at the time and use that
* instead so as to not hold the iface spinlock for opening
* channels
*/
spin_lock(&ses->iface_lock);
iface_count = ses->iface_count;
if (iface_count <= 0) {
spin_unlock(&ses->iface_lock);
cifs_dbg(VFS, "no iface list available to open channels\n");
return 0;
}
ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces),
GFP_ATOMIC);
if (!ifaces) {
spin_unlock(&ses->iface_lock);
return 0;
}
spin_unlock(&ses->iface_lock);
/*
* Keep connecting to same, fastest, iface for all channels as
* long as its RSS. Try next fastest one if not RSS or channel
* creation fails.
*/
spin_lock(&ses->iface_lock);
iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
iface_head);
spin_unlock(&ses->iface_lock);
while (left > 0) {
struct cifs_server_iface *iface;
tries++;
if (tries > 3*ses->chan_max) {
@ -219,27 +201,46 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
break;
}
iface = &ifaces[i];
if (is_ses_using_iface(ses, iface) && !iface->rss_capable) {
i = (i+1) % iface_count;
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
break;
}
list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
iface_head) {
/* skip ifaces that are unusable */
if (!iface->is_active ||
(is_ses_using_iface(ses, iface) &&
!iface->rss_capable)) {
continue;
}
/* take ref before unlock */
kref_get(&iface->refcount);
spin_unlock(&ses->iface_lock);
rc = cifs_ses_add_channel(cifs_sb, ses, iface);
spin_lock(&ses->iface_lock);
if (rc) {
cifs_dbg(FYI, "failed to open extra channel on iface#%d rc=%d\n",
i, rc);
i = (i+1) % iface_count;
cifs_dbg(VFS, "failed to open extra channel on iface:%pIS rc=%d\n",
&iface->sockaddr,
rc);
kref_put(&iface->refcount, release_iface);
continue;
}
cifs_dbg(FYI, "successfully opened new channel on iface#%d\n",
i);
cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
&iface->sockaddr);
break;
}
spin_unlock(&ses->iface_lock);
left--;
new_chan_count++;
}
kfree(ifaces);
return new_chan_count - old_chan_count;
}
@ -355,6 +356,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
spin_unlock(&ses->chan_lock);
goto out;
}
chan->iface = iface;
ses->chan_count++;
atomic_set(&ses->chan_seq, 0);

View File

@ -512,30 +512,135 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
static int
parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
size_t buf_len,
struct cifs_server_iface **iface_list,
size_t *iface_count)
struct cifs_ses *ses)
{
struct network_interface_info_ioctl_rsp *p;
struct sockaddr_in *addr4;
struct sockaddr_in6 *addr6;
struct iface_info_ipv4 *p4;
struct iface_info_ipv6 *p6;
struct cifs_server_iface *info;
struct cifs_server_iface *info = NULL, *iface = NULL, *niface = NULL;
struct cifs_server_iface tmp_iface;
ssize_t bytes_left;
size_t next = 0;
int nb_iface = 0;
int rc = 0;
*iface_list = NULL;
*iface_count = 0;
/*
* Fist pass: count and sanity check
*/
int rc = 0, ret = 0;
bytes_left = buf_len;
p = buf;
spin_lock(&ses->iface_lock);
/*
* Go through iface_list and do kref_put to remove
* any unused ifaces. ifaces in use will be removed
* when the last user calls a kref_put on it
*/
list_for_each_entry_safe(iface, niface, &ses->iface_list,
iface_head) {
iface->is_active = 0;
kref_put(&iface->refcount, release_iface);
}
spin_unlock(&ses->iface_lock);
while (bytes_left >= sizeof(*p)) {
memset(&tmp_iface, 0, sizeof(tmp_iface));
tmp_iface.speed = le64_to_cpu(p->LinkSpeed);
tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
tmp_iface.rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0;
switch (p->Family) {
/*
* The kernel and wire socket structures have the same
* layout and use network byte order but make the
* conversion explicit in case either one changes.
*/
case INTERNETWORK:
addr4 = (struct sockaddr_in *)&tmp_iface.sockaddr;
p4 = (struct iface_info_ipv4 *)p->Buffer;
addr4->sin_family = AF_INET;
memcpy(&addr4->sin_addr, &p4->IPv4Address, 4);
/* [MS-SMB2] 2.2.32.5.1.1 Clients MUST ignore these */
addr4->sin_port = cpu_to_be16(CIFS_PORT);
cifs_dbg(FYI, "%s: ipv4 %pI4\n", __func__,
&addr4->sin_addr);
break;
case INTERNETWORKV6:
addr6 = (struct sockaddr_in6 *)&tmp_iface.sockaddr;
p6 = (struct iface_info_ipv6 *)p->Buffer;
addr6->sin6_family = AF_INET6;
memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16);
/* [MS-SMB2] 2.2.32.5.1.2 Clients MUST ignore these */
addr6->sin6_flowinfo = 0;
addr6->sin6_scope_id = 0;
addr6->sin6_port = cpu_to_be16(CIFS_PORT);
cifs_dbg(FYI, "%s: ipv6 %pI6\n", __func__,
&addr6->sin6_addr);
break;
default:
cifs_dbg(VFS,
"%s: skipping unsupported socket family\n",
__func__);
goto next_iface;
}
/*
* The iface_list is assumed to be sorted by speed.
* Check if the new interface exists in that list.
* NEVER change iface. it could be in use.
* Add a new one instead
*/
spin_lock(&ses->iface_lock);
iface = niface = NULL;
list_for_each_entry_safe(iface, niface, &ses->iface_list,
iface_head) {
ret = iface_cmp(iface, &tmp_iface);
if (!ret) {
/* just get a ref so that it doesn't get picked/freed */
iface->is_active = 1;
kref_get(&iface->refcount);
spin_unlock(&ses->iface_lock);
goto next_iface;
} else if (ret < 0) {
/* all remaining ifaces are slower */
kref_get(&iface->refcount);
break;
}
}
spin_unlock(&ses->iface_lock);
/* no match. insert the entry in the list */
info = kmalloc(sizeof(struct cifs_server_iface),
GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto out;
}
memcpy(info, &tmp_iface, sizeof(tmp_iface));
/* add this new entry to the list */
kref_init(&info->refcount);
info->is_active = 1;
cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, ses->iface_count);
cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__,
le32_to_cpu(p->Capability));
spin_lock(&ses->iface_lock);
if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) {
list_add_tail(&info->iface_head, &iface->iface_head);
kref_put(&iface->refcount, release_iface);
} else
list_add_tail(&info->iface_head, &ses->iface_list);
spin_unlock(&ses->iface_lock);
ses->iface_count++;
ses->iface_last_update = jiffies;
next_iface:
nb_iface++;
next = le32_to_cpu(p->Next);
if (!next) {
@ -557,108 +662,21 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
/*
* Second pass: extract info to internal structure
*/
*iface_list = kcalloc(nb_iface, sizeof(**iface_list), GFP_KERNEL);
if (!*iface_list) {
rc = -ENOMEM;
goto out;
}
info = *iface_list;
bytes_left = buf_len;
p = buf;
while (bytes_left >= sizeof(*p)) {
info->speed = le64_to_cpu(p->LinkSpeed);
info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0;
cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count);
cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__,
le32_to_cpu(p->Capability));
switch (p->Family) {
/*
* The kernel and wire socket structures have the same
* layout and use network byte order but make the
* conversion explicit in case either one changes.
*/
case INTERNETWORK:
addr4 = (struct sockaddr_in *)&info->sockaddr;
p4 = (struct iface_info_ipv4 *)p->Buffer;
addr4->sin_family = AF_INET;
memcpy(&addr4->sin_addr, &p4->IPv4Address, 4);
/* [MS-SMB2] 2.2.32.5.1.1 Clients MUST ignore these */
addr4->sin_port = cpu_to_be16(CIFS_PORT);
cifs_dbg(FYI, "%s: ipv4 %pI4\n", __func__,
&addr4->sin_addr);
break;
case INTERNETWORKV6:
addr6 = (struct sockaddr_in6 *)&info->sockaddr;
p6 = (struct iface_info_ipv6 *)p->Buffer;
addr6->sin6_family = AF_INET6;
memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16);
/* [MS-SMB2] 2.2.32.5.1.2 Clients MUST ignore these */
addr6->sin6_flowinfo = 0;
addr6->sin6_scope_id = 0;
addr6->sin6_port = cpu_to_be16(CIFS_PORT);
cifs_dbg(FYI, "%s: ipv6 %pI6\n", __func__,
&addr6->sin6_addr);
break;
default:
cifs_dbg(VFS,
"%s: skipping unsupported socket family\n",
__func__);
goto next_iface;
}
(*iface_count)++;
info++;
next_iface:
next = le32_to_cpu(p->Next);
if (!next)
break;
p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next);
bytes_left -= next;
}
if (!*iface_count) {
if (!ses->iface_count) {
rc = -EINVAL;
goto out;
}
out:
if (rc) {
kfree(*iface_list);
*iface_count = 0;
*iface_list = NULL;
}
return rc;
}
static int compare_iface(const void *ia, const void *ib)
{
const struct cifs_server_iface *a = (struct cifs_server_iface *)ia;
const struct cifs_server_iface *b = (struct cifs_server_iface *)ib;
return a->speed == b->speed ? 0 : (a->speed > b->speed ? -1 : 1);
}
static int
SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
{
int rc;
unsigned int ret_data_len = 0;
struct network_interface_info_ioctl_rsp *out_buf = NULL;
struct cifs_server_iface *iface_list;
size_t iface_count;
struct cifs_ses *ses = tcon->ses;
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
@ -674,21 +692,10 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
goto out;
}
rc = parse_server_interfaces(out_buf, ret_data_len,
&iface_list, &iface_count);
rc = parse_server_interfaces(out_buf, ret_data_len, ses);
if (rc)
goto out;
/* sort interfaces from fastest to slowest */
sort(iface_list, iface_count, sizeof(*iface_list), compare_iface, NULL);
spin_lock(&ses->iface_lock);
kfree(ses->iface_list);
ses->iface_list = iface_list;
ses->iface_count = iface_count;
ses->iface_last_update = jiffies;
spin_unlock(&ses->iface_lock);
out:
kfree(out_buf);
return rc;