linux/fs/autofs/waitq.c

514 lines
12 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
* Copyright 2001-2006 Ian Kent <raven@themaw.net>
*/
#include <linux/sched/signal.h>
#include "autofs_i.h"
/* We make this a static variable rather than a part of the superblock; it
* is better if we don't reassign numbers easily even across filesystems
*/
static autofs_wqt_t autofs_next_wait_queue = 1;
void autofs_catatonic_mode(struct autofs_sb_info *sbi)
{
struct autofs_wait_queue *wq, *nwq;
mutex_lock(&sbi->wq_mutex);
if (sbi->flags & AUTOFS_SBI_CATATONIC) {
mutex_unlock(&sbi->wq_mutex);
return;
}
pr_debug("entering catatonic mode\n");
sbi->flags |= AUTOFS_SBI_CATATONIC;
wq = sbi->queues;
sbi->queues = NULL; /* Erase all wait queues */
while (wq) {
nwq = wq->next;
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name - wq->offset);
wq->name.name = NULL;
wake_up(&wq->queue);
autofs: fix memory leak of waitqueues in autofs_catatonic_mode Syzkaller reports a memory leak: BUG: memory leak unreferenced object 0xffff88810b279e00 (size 96): comm "syz-executor399", pid 3631, jiffies 4294964921 (age 23.870s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 08 9e 27 0b 81 88 ff ff ..........'..... 08 9e 27 0b 81 88 ff ff 00 00 00 00 00 00 00 00 ..'............. backtrace: [<ffffffff814cfc90>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046 [<ffffffff81bb75ca>] kmalloc include/linux/slab.h:576 [inline] [<ffffffff81bb75ca>] autofs_wait+0x3fa/0x9a0 fs/autofs/waitq.c:378 [<ffffffff81bb88a7>] autofs_do_expire_multi+0xa7/0x3e0 fs/autofs/expire.c:593 [<ffffffff81bb8c33>] autofs_expire_multi+0x53/0x80 fs/autofs/expire.c:619 [<ffffffff81bb6972>] autofs_root_ioctl_unlocked+0x322/0x3b0 fs/autofs/root.c:897 [<ffffffff81bb6a95>] autofs_root_ioctl+0x25/0x30 fs/autofs/root.c:910 [<ffffffff81602a9c>] vfs_ioctl fs/ioctl.c:51 [inline] [<ffffffff81602a9c>] __do_sys_ioctl fs/ioctl.c:870 [inline] [<ffffffff81602a9c>] __se_sys_ioctl fs/ioctl.c:856 [inline] [<ffffffff81602a9c>] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856 [<ffffffff84608225>] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [<ffffffff84608225>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd autofs_wait_queue structs should be freed if their wait_ctr becomes zero. Otherwise they will be lost. In this case an AUTOFS_IOC_EXPIRE_MULTI ioctl is done, then a new waitqueue struct is allocated in autofs_wait(), its initial wait_ctr equals 2. After that wait_event_killable() is interrupted (it returns -ERESTARTSYS), so that 'wq->name.name == NULL' condition may be not satisfied. Actually, this condition can be satisfied when autofs_wait_release() or autofs_catatonic_mode() is called and, what is also important, wait_ctr is decremented in those places. Upon the exit of autofs_wait(), wait_ctr is decremented to 1. Then the unmounting process begins: kill_sb calls autofs_catatonic_mode(), which should have freed the waitqueues, but it only decrements its usage counter to zero which is not a correct behaviour. edit:imk This description is of course not correct. The umount performed as a result of an expire is a umount of a mount that has been automounted, it's not the autofs mount itself. They happen independently, usually after everything mounted within the autofs file system has been expired away. If everything hasn't been expired away the automount daemon can still exit leaving mounts in place. But expires done in both cases will result in a notification that calls autofs_wait_release() with a result status. The problem case is the summary execution of of the automount daemon. In this case any waiting processes won't be woken up until either they are terminated or the mount is umounted. end edit: imk So in catatonic mode we should free waitqueues which counter becomes zero. edit: imk Initially I was concerned that the calling of autofs_wait_release() and autofs_catatonic_mode() was not mutually exclusive but that can't be the case (obviously) because the queue entry (or entries) is removed from the list when either of these two functions are called. Consequently the wait entry will be freed by only one of these functions or by the woken process in autofs_wait() depending on the order of the calls. end edit: imk Reported-by: syzbot+5e53f70e69ff0c0a1c0c@syzkaller.appspotmail.com Suggested-by: Takeshi Misawa <jeliantsurux@gmail.com> Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru> Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru> Signed-off-by: Ian Kent <raven@themaw.net> Cc: Matthew Wilcox <willy@infradead.org> Cc: Andrei Vagin <avagin@gmail.com> Cc: autofs@vger.kernel.org Cc: linux-kernel@vger.kernel.org Message-Id: <169112719161.7590.6700123246297365841.stgit@donald.themaw.net> Signed-off-by: Christian Brauner <brauner@kernel.org>
2023-08-04 13:33:12 +08:00
if (!--wq->wait_ctr)
kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
sbi->pipe = NULL;
sbi->pipefd = -1;
mutex_unlock(&sbi->wq_mutex);
}
static int autofs_write(struct autofs_sb_info *sbi,
struct file *file, const void *addr, int bytes)
{
unsigned long sigpipe, flags;
const char *data = (const char *)addr;
ssize_t wr = 0;
sigpipe = sigismember(&current->pending.signal, SIGPIPE);
mutex_lock(&sbi->pipe_mutex);
while (bytes) {
autofs: use __kernel_write() for the autofs pipe writing autofs got broken in some configurations by commit 13c164b1a186 ("autofs: switch to kernel_write") because there is now an extra LSM permission check done by security_file_permission() in rw_verify_area(). autofs is one if the few places that really does want the much more limited __kernel_write(), because the write is an internal kernel one that shouldn't do any user permission checks (it also doesn't need the file_start_write/file_end_write logic, since it's just a pipe). There are a couple of other cases like that - accounting, core dumping, and splice - but autofs stands out because it can be built as a module. As a result, we need to export this internal __kernel_write() function again. We really don't want any other module to use this, but we don't have a "EXPORT_SYMBOL_FOR_AUTOFS_ONLY()". But we can mark it GPL-only to at least approximate that "internal use only" for licensing. While in this area, make autofs pass in NULL for the file position pointer, since it's always a pipe, and we now use a NULL file pointer for streaming file descriptors (see file_ppos() and commit 438ab720c675: "vfs: pass ppos=NULL to .read()/.write() of FMODE_STREAM files") This effectively reverts commits 9db977522449 ("fs: unexport __kernel_write") and 13c164b1a186 ("autofs: switch to kernel_write"). Fixes: 13c164b1a186 ("autofs: switch to kernel_write") Reported-by: Ondrej Mosnacek <omosnace@redhat.com> Acked-by: Christoph Hellwig <hch@lst.de> Acked-by: Acked-by: Ian Kent <raven@themaw.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-09-30 08:18:34 +08:00
wr = __kernel_write(file, data, bytes, NULL);
if (wr <= 0)
break;
data += wr;
bytes -= wr;
}
mutex_unlock(&sbi->pipe_mutex);
/* Keep the currently executing process from receiving a
* SIGPIPE unless it was already supposed to get one
*/
if (wr == -EPIPE && !sigpipe) {
spin_lock_irqsave(&current->sighand->siglock, flags);
sigdelset(&current->pending.signal, SIGPIPE);
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
/* if 'wr' returned 0 (impossible) we assume -EIO (safe) */
return bytes == 0 ? 0 : wr < 0 ? wr : -EIO;
}
static void autofs_notify_daemon(struct autofs_sb_info *sbi,
struct autofs_wait_queue *wq,
int type)
{
union {
struct autofs_packet_hdr hdr;
union autofs_packet_union v4_pkt;
union autofs_v5_packet_union v5_pkt;
} pkt;
struct file *pipe = NULL;
size_t pktsz;
int ret;
pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
(unsigned long) wq->wait_queue_token,
wq->name.len, wq->name.name, type);
memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
pkt.hdr.proto_version = sbi->version;
pkt.hdr.type = type;
switch (type) {
/* Kernel protocol v4 missing and expire packets */
case autofs_ptype_missing:
{
struct autofs_packet_missing *mp = &pkt.v4_pkt.missing;
pktsz = sizeof(*mp);
mp->wait_queue_token = wq->wait_queue_token;
mp->len = wq->name.len;
memcpy(mp->name, wq->name.name, wq->name.len);
mp->name[wq->name.len] = '\0';
break;
}
case autofs_ptype_expire_multi:
{
struct autofs_packet_expire_multi *ep =
&pkt.v4_pkt.expire_multi;
pktsz = sizeof(*ep);
ep->wait_queue_token = wq->wait_queue_token;
ep->len = wq->name.len;
memcpy(ep->name, wq->name.name, wq->name.len);
ep->name[wq->name.len] = '\0';
break;
}
/*
* Kernel protocol v5 packet for handling indirect and direct
* mount missing and expire requests
*/
case autofs_ptype_missing_indirect:
case autofs_ptype_expire_indirect:
case autofs_ptype_missing_direct:
case autofs_ptype_expire_direct:
{
struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns;
pktsz = sizeof(*packet);
packet->wait_queue_token = wq->wait_queue_token;
packet->len = wq->name.len;
memcpy(packet->name, wq->name.name, wq->name.len);
packet->name[wq->name.len] = '\0';
packet->dev = wq->dev;
packet->ino = wq->ino;
packet->uid = from_kuid_munged(user_ns, wq->uid);
packet->gid = from_kgid_munged(user_ns, wq->gid);
packet->pid = wq->pid;
packet->tgid = wq->tgid;
break;
}
default:
pr_warn("bad type %d!\n", type);
mutex_unlock(&sbi->wq_mutex);
return;
}
pipe = get_file(sbi->pipe);
mutex_unlock(&sbi->wq_mutex);
switch (ret = autofs_write(sbi, pipe, &pkt, pktsz)) {
case 0:
break;
case -ENOMEM:
case -ERESTARTSYS:
/* Just fail this one */
autofs_wait_release(sbi, wq->wait_queue_token, ret);
break;
default:
autofs_catatonic_mode(sbi);
break;
}
fput(pipe);
}
static struct autofs_wait_queue *
autofs_find_wait(struct autofs_sb_info *sbi, const struct qstr *qstr)
{
struct autofs_wait_queue *wq;
for (wq = sbi->queues; wq; wq = wq->next) {
if (wq->name.hash == qstr->hash &&
wq->name.len == qstr->len &&
wq->name.name &&
!memcmp(wq->name.name, qstr->name, qstr->len))
break;
}
return wq;
}
/*
* Check if we have a valid request.
* Returns
* 1 if the request should continue.
* In this case we can return an autofs_wait_queue entry if one is
* found or NULL to idicate a new wait needs to be created.
* 0 or a negative errno if the request shouldn't continue.
*/
static int validate_request(struct autofs_wait_queue **wait,
struct autofs_sb_info *sbi,
const struct qstr *qstr,
const struct path *path, enum autofs_notify notify)
{
struct dentry *dentry = path->dentry;
struct autofs_wait_queue *wq;
struct autofs_info *ino;
if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
/* Wait in progress, continue; */
wq = autofs_find_wait(sbi, qstr);
if (wq) {
*wait = wq;
return 1;
}
*wait = NULL;
/* If we don't yet have any info this is a new request */
ino = autofs_dentry_ino(dentry);
if (!ino)
return 1;
/*
* If we've been asked to wait on an existing expire (NFY_NONE)
* but there is no wait in the queue ...
*/
if (notify == NFY_NONE) {
/*
* Either we've betean the pending expire to post it's
* wait or it finished while we waited on the mutex.
* So we need to wait till either, the wait appears
* or the expire finishes.
*/
while (ino->flags & AUTOFS_INF_EXPIRING) {
mutex_unlock(&sbi->wq_mutex);
schedule_timeout_interruptible(HZ/10);
if (mutex_lock_interruptible(&sbi->wq_mutex))
return -EINTR;
if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
wq = autofs_find_wait(sbi, qstr);
if (wq) {
*wait = wq;
return 1;
}
}
/*
* Not ideal but the status has already gone. Of the two
* cases where we wait on NFY_NONE neither depend on the
* return status of the wait.
*/
return 0;
}
/*
* If we've been asked to trigger a mount and the request
* completed while we waited on the mutex ...
*/
if (notify == NFY_MOUNT) {
struct dentry *new = NULL;
struct path this;
int valid = 1;
/*
* If the dentry was successfully mounted while we slept
* on the wait queue mutex we can return success. If it
* isn't mounted (doesn't have submounts for the case of
* a multi-mount with no mount at it's base) we can
* continue on and create a new request.
*/
if (!IS_ROOT(dentry)) {
if (d_unhashed(dentry) &&
d_really_is_positive(dentry)) {
struct dentry *parent = dentry->d_parent;
new = d_lookup(parent, &dentry->d_name);
if (new)
dentry = new;
}
}
this.mnt = path->mnt;
this.dentry = dentry;
if (path_has_submounts(&this))
valid = 0;
if (new)
dput(new);
return valid;
}
return 1;
}
int autofs_wait(struct autofs_sb_info *sbi,
const struct path *path, enum autofs_notify notify)
{
struct dentry *dentry = path->dentry;
struct autofs_wait_queue *wq;
struct qstr qstr;
char *name;
int status, ret, type;
unsigned int offset = 0;
pid_t pid;
pid_t tgid;
/* In catatonic mode, we don't wait for nobody */
if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
/*
* Try translating pids to the namespace of the daemon.
*
* Zero means failure: we are in an unrelated pid namespace.
*/
pid = task_pid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
tgid = task_tgid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
if (pid == 0 || tgid == 0)
return -ENOENT;
if (d_really_is_negative(dentry)) {
/*
* A wait for a negative dentry is invalid for certain
* cases. A direct or offset mount "always" has its mount
* point directory created and so the request dentry must
* be positive or the map key doesn't exist. The situation
* is very similar for indirect mounts except only dentrys
* in the root of the autofs file system may be negative.
*/
if (autofs_type_trigger(sbi->type))
return -ENOENT;
else if (!IS_ROOT(dentry->d_parent))
return -ENOENT;
}
name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
if (!name)
return -ENOMEM;
/* If this is a direct mount request create a dummy name */
if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type)) {
qstr.name = name;
qstr.len = sprintf(name, "%p", dentry);
} else {
char *p = dentry_path_raw(dentry, name, NAME_MAX);
if (IS_ERR(p)) {
kfree(name);
return -ENOENT;
}
qstr.name = ++p; // skip the leading slash
qstr.len = strlen(p);
offset = p - name;
}
qstr.hash = full_name_hash(dentry, qstr.name, qstr.len);
if (mutex_lock_interruptible(&sbi->wq_mutex)) {
kfree(name);
return -EINTR;
}
ret = validate_request(&wq, sbi, &qstr, path, notify);
if (ret <= 0) {
if (ret != -EINTR)
mutex_unlock(&sbi->wq_mutex);
kfree(name);
return ret;
}
if (!wq) {
/* Create a new wait queue */
wq = kmalloc(sizeof(struct autofs_wait_queue), GFP_KERNEL);
if (!wq) {
kfree(name);
mutex_unlock(&sbi->wq_mutex);
return -ENOMEM;
}
wq->wait_queue_token = autofs_next_wait_queue;
if (++autofs_next_wait_queue == 0)
autofs_next_wait_queue = 1;
wq->next = sbi->queues;
sbi->queues = wq;
init_waitqueue_head(&wq->queue);
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->offset = offset;
wq->dev = autofs_get_dev(sbi);
wq->ino = autofs_get_ino(sbi);
wq->uid = current_uid();
wq->gid = current_gid();
wq->pid = pid;
wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
wq->wait_ctr = 2;
if (sbi->version < 5) {
if (notify == NFY_MOUNT)
type = autofs_ptype_missing;
else
type = autofs_ptype_expire_multi;
} else {
if (notify == NFY_MOUNT)
type = autofs_type_trigger(sbi->type) ?
autofs_ptype_missing_direct :
autofs_ptype_missing_indirect;
else
type = autofs_type_trigger(sbi->type) ?
autofs_ptype_expire_direct :
autofs_ptype_expire_indirect;
}
pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
(unsigned long) wq->wait_queue_token, wq->name.len,
wq->name.name, notify);
/*
* autofs_notify_daemon() may block; it will unlock ->wq_mutex
*/
autofs_notify_daemon(sbi, wq, type);
} else {
wq->wait_ctr++;
pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
(unsigned long) wq->wait_queue_token, wq->name.len,
wq->name.name, notify);
mutex_unlock(&sbi->wq_mutex);
kfree(name);
}
/*
* wq->name.name is NULL iff the lock is already released
* or the mount has been made catatonic.
*/
wait_event_killable(wq->queue, wq->name.name == NULL);
status = wq->status;
/*
* For direct and offset mounts we need to track the requester's
* uid and gid in the dentry info struct. This is so it can be
* supplied, on request, by the misc device ioctl interface.
* This is needed during daemon resatart when reconnecting
* to existing, active, autofs mounts. The uid and gid (and
* related string values) may be used for macro substitution
* in autofs mount maps.
*/
if (!status) {
struct autofs_info *ino;
struct dentry *de = NULL;
/* direct mount or browsable map */
ino = autofs_dentry_ino(dentry);
if (!ino) {
/* If not lookup actual dentry used */
de = d_lookup(dentry->d_parent, &dentry->d_name);
if (de)
ino = autofs_dentry_ino(de);
}
/* Set mount requester */
if (ino) {
spin_lock(&sbi->fs_lock);
ino->uid = wq->uid;
ino->gid = wq->gid;
spin_unlock(&sbi->fs_lock);
}
if (de)
dput(de);
}
/* Are we the last process to need status? */
mutex_lock(&sbi->wq_mutex);
if (!--wq->wait_ctr)
kfree(wq);
mutex_unlock(&sbi->wq_mutex);
return status;
}
int autofs_wait_release(struct autofs_sb_info *sbi,
autofs_wqt_t wait_queue_token, int status)
{
struct autofs_wait_queue *wq, **wql;
mutex_lock(&sbi->wq_mutex);
for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
if (wq->wait_queue_token == wait_queue_token)
break;
}
if (!wq) {
mutex_unlock(&sbi->wq_mutex);
return -EINVAL;
}
*wql = wq->next; /* Unlink from chain */
kfree(wq->name.name - wq->offset);
wq->name.name = NULL; /* Do not wait on this queue */
wq->status = status;
wake_up(&wq->queue);
if (!--wq->wait_ctr)
kfree(wq);
mutex_unlock(&sbi->wq_mutex);
return 0;
}