linux/fs/notify/fanotify/fanotify.c
Amir Goldstein 77115225ac fanotify: cache fsid in fsnotify_mark_connector
For FAN_REPORT_FID, we need to encode fid with fsid of the filesystem on
every event. To avoid having to call vfs_statfs() on every event to get
fsid, we store the fsid in fsnotify_mark_connector on the first time we
add a mark and on handle event we use the cached fsid.

Subsequent calls to add mark on the same object are expected to pass the
same fsid, so the call will fail on cached fsid mismatch.

If an event is reported on several mark types (inode, mount, filesystem),
all connectors should already have the same fsid, so we use the cached
fsid from the first connector.

[JK: Simplify code flow around fanotify_get_fid()
     make fsid argument of fsnotify_add_mark_locked() unconditional]

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
2019-02-07 16:38:35 +01:00

398 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/fanotify.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h> /* UINT_MAX */
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/sched/user.h>
#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/audit.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
#include "fanotify.h"
static bool should_merge(struct fsnotify_event *old_fsn,
struct fsnotify_event *new_fsn)
{
struct fanotify_event *old, *new;
pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
old = FANOTIFY_E(old_fsn);
new = FANOTIFY_E(new_fsn);
if (old_fsn->inode != new_fsn->inode || old->pid != new->pid ||
old->fh_type != new->fh_type || old->fh_len != new->fh_len)
return false;
if (fanotify_event_has_path(old)) {
return old->path.mnt == new->path.mnt &&
old->path.dentry == new->path.dentry;
} else if (fanotify_event_has_fid(old)) {
return fanotify_fid_equal(&old->fid, &new->fid, old->fh_len);
}
/* Do not merge events if we failed to encode fid */
return false;
}
/* and the list better be locked by something too! */
static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
{
struct fsnotify_event *test_event;
struct fanotify_event *new;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
new = FANOTIFY_E(event);
/*
* Don't merge a permission event with any other event so that we know
* the event structure we have created in fanotify_handle_event() is the
* one we should check for permission response.
*/
if (fanotify_is_perm_event(new->mask))
return 0;
list_for_each_entry_reverse(test_event, list, list) {
if (should_merge(test_event, event)) {
FANOTIFY_E(test_event)->mask |= new->mask;
return 1;
}
}
return 0;
}
static int fanotify_get_response(struct fsnotify_group *group,
struct fanotify_perm_event *event,
struct fsnotify_iter_info *iter_info)
{
int ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
wait_event(group->fanotify_data.access_waitq, event->response);
/* userspace responded, convert to something usable */
switch (event->response & ~FAN_AUDIT) {
case FAN_ALLOW:
ret = 0;
break;
case FAN_DENY:
default:
ret = -EPERM;
}
/* Check if the response should be audited */
if (event->response & FAN_AUDIT)
audit_fanotify(event->response & ~FAN_AUDIT);
event->response = 0;
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
return ret;
}
/*
* This function returns a mask for an event that only contains the flags
* that have been specifically requested by the user. Flags that may have
* been included within the event mask, but have not been explicitly
* requested by the user, will not be present in the returned mask.
*/
static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info,
u32 event_mask, const void *data,
int data_type)
{
__u32 marks_mask = 0, marks_ignored_mask = 0;
const struct path *path = data;
struct fsnotify_mark *mark;
int type;
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
/* If we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_PATH)
return 0;
/* Sorry, fanotify only gives a damn about files and dirs */
if (!d_is_reg(path->dentry) &&
!d_can_lookup(path->dentry))
return 0;
fsnotify_foreach_obj_type(type) {
if (!fsnotify_iter_should_report_type(iter_info, type))
continue;
mark = iter_info->marks[type];
/*
* If the event is for a child and this mark doesn't care about
* events on a child, don't send it!
*/
if (event_mask & FS_EVENT_ON_CHILD &&
(type != FSNOTIFY_OBJ_TYPE_INODE ||
!(mark->mask & FS_EVENT_ON_CHILD)))
continue;
marks_mask |= mark->mask;
marks_ignored_mask |= mark->ignored_mask;
}
if (d_is_dir(path->dentry) &&
!(marks_mask & FS_ISDIR & ~marks_ignored_mask))
return 0;
return event_mask & FANOTIFY_OUTGOING_EVENTS & marks_mask &
~marks_ignored_mask;
}
static int fanotify_encode_fid(struct fanotify_event *event,
struct inode *inode, gfp_t gfp,
__kernel_fsid_t *fsid)
{
struct fanotify_fid *fid = &event->fid;
int dwords, bytes = 0;
int err, type;
fid->ext_fh = NULL;
dwords = 0;
err = -ENOENT;
type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
if (!dwords)
goto out_err;
bytes = dwords << 2;
if (bytes > FANOTIFY_INLINE_FH_LEN) {
/* Treat failure to allocate fh as failure to allocate event */
err = -ENOMEM;
fid->ext_fh = kmalloc(bytes, gfp);
if (!fid->ext_fh)
goto out_err;
}
type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes),
&dwords, NULL);
err = -EINVAL;
if (!type || type == FILEID_INVALID || bytes != dwords << 2)
goto out_err;
fid->fsid = *fsid;
event->fh_len = bytes;
return type;
out_err:
pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, "
"type=%d, bytes=%d, err=%i)\n",
fsid->val[0], fsid->val[1], type, bytes, err);
kfree(fid->ext_fh);
fid->ext_fh = NULL;
event->fh_len = 0;
return FILEID_INVALID;
}
struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
struct inode *inode, u32 mask,
const struct path *path,
__kernel_fsid_t *fsid)
{
struct fanotify_event *event = NULL;
gfp_t gfp = GFP_KERNEL_ACCOUNT;
/*
* For queues with unlimited length lost events are not expected and
* can possibly have security implications. Avoid losing events when
* memory is short.
*/
if (group->max_events == UINT_MAX)
gfp |= __GFP_NOFAIL;
/* Whoever is interested in the event, pays for the allocation. */
memalloc_use_memcg(group->memcg);
if (fanotify_is_perm_event(mask)) {
struct fanotify_perm_event *pevent;
pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
if (!pevent)
goto out;
event = &pevent->fae;
pevent->response = 0;
goto init;
}
event = kmem_cache_alloc(fanotify_event_cachep, gfp);
if (!event)
goto out;
init: __maybe_unused
fsnotify_init_event(&event->fse, inode);
event->mask = mask;
if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
event->pid = get_pid(task_pid(current));
else
event->pid = get_pid(task_tgid(current));
event->fh_len = 0;
if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
/* Report the event without a file identifier on encode error */
event->fh_type = fanotify_encode_fid(event,
d_inode(path->dentry), gfp, fsid);
} else if (path) {
event->fh_type = FILEID_ROOT;
event->path = *path;
path_get(&event->path);
} else {
event->fh_type = FILEID_INVALID;
event->path.mnt = NULL;
event->path.dentry = NULL;
}
out:
memalloc_unuse_memcg();
return event;
}
/*
* Get cached fsid of the filesystem containing the object from any connector.
* All connectors are supposed to have the same fsid, but we do not verify that
* here.
*/
static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
{
int type;
__kernel_fsid_t fsid = {};
fsnotify_foreach_obj_type(type) {
if (!fsnotify_iter_should_report_type(iter_info, type))
continue;
fsid = iter_info->marks[type]->connector->fsid;
if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
continue;
return fsid;
}
return fsid;
}
static int fanotify_handle_event(struct fsnotify_group *group,
struct inode *inode,
u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info)
{
int ret = 0;
struct fanotify_event *event;
struct fsnotify_event *fsn_event;
__kernel_fsid_t fsid = {};
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 12);
mask = fanotify_group_event_mask(iter_info, mask, data, data_type);
if (!mask)
return 0;
pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
mask);
if (fanotify_is_perm_event(mask)) {
/*
* fsnotify_prepare_user_wait() fails if we race with mark
* deletion. Just let the operation pass in that case.
*/
if (!fsnotify_prepare_user_wait(iter_info))
return 0;
}
if (FAN_GROUP_FLAG(group, FAN_REPORT_FID))
fsid = fanotify_get_fsid(iter_info);
event = fanotify_alloc_event(group, inode, mask, data, &fsid);
ret = -ENOMEM;
if (unlikely(!event)) {
/*
* We don't queue overflow events for permission events as
* there the access is denied and so no event is in fact lost.
*/
if (!fanotify_is_perm_event(mask))
fsnotify_queue_overflow(group);
goto finish;
}
fsn_event = &event->fse;
ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
if (ret) {
/* Permission events shouldn't be merged */
BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
/* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
ret = 0;
} else if (fanotify_is_perm_event(mask)) {
ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
iter_info);
fsnotify_destroy_event(group, fsn_event);
}
finish:
if (fanotify_is_perm_event(mask))
fsnotify_finish_user_wait(iter_info);
return ret;
}
static void fanotify_free_group_priv(struct fsnotify_group *group)
{
struct user_struct *user;
user = group->fanotify_data.user;
atomic_dec(&user->fanotify_listeners);
free_uid(user);
}
static void fanotify_free_event(struct fsnotify_event *fsn_event)
{
struct fanotify_event *event;
event = FANOTIFY_E(fsn_event);
if (fanotify_event_has_path(event))
path_put(&event->path);
else if (fanotify_event_has_ext_fh(event))
kfree(event->fid.ext_fh);
put_pid(event->pid);
if (fanotify_is_perm_event(event->mask)) {
kmem_cache_free(fanotify_perm_event_cachep,
FANOTIFY_PE(fsn_event));
return;
}
kmem_cache_free(fanotify_event_cachep, event);
}
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
{
kmem_cache_free(fanotify_mark_cache, fsn_mark);
}
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.free_group_priv = fanotify_free_group_priv,
.free_event = fanotify_free_event,
.free_mark = fanotify_free_mark,
};